Java Code Examples for org.apache.commons.io.input.BOMInputStream

The following are top voted examples for showing how to use org.apache.commons.io.input.BOMInputStream. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: Gargoyle   File: XmlFileReadModel.java   View source code 6 votes vote down vote up
@Override
public List<String> readLines(URL url) throws IOException {

	ByteArrayOutputStream out = new ByteArrayOutputStream();
	
	try (InputStream in = new BOMInputStream(url.openStream());) {

		int tmp = -1;
		while ((tmp = in.read()) != -1) {
			out.write(tmp);
		}
	}

	String string = out.toString();
	LOGGER.debug(string);
	XMLDiffFormatter xmlFormatter = new XMLDiffFormatter();
	String format = xmlFormatter.format(string);

	return Stream.of(format.split("\n")).collect(Collectors.toList());
}
 
Example 2
Project: DigitalMediaServer   File: PlaylistFolder.java   View source code 6 votes vote down vote up
private BufferedReader getBufferedReader() throws IOException {
	String extension;
	Charset charset;
	if (FileUtil.isUrl(uri)) {
		extension = FileUtil.getUrlExtension(uri).toLowerCase(PMS.getLocale());
	} else {
		extension = FileUtil.getExtension(uri).toLowerCase(PMS.getLocale());
	}
	if (extension != null && (extension.equals("m3u8") || extension.equals(".cue"))) {
		charset = StandardCharsets.UTF_8;
	} else {
		charset = StandardCharsets.ISO_8859_1;
	}
	if (FileUtil.isUrl(uri)) {
		return new BufferedReader(new InputStreamReader(new BOMInputStream(new URL(uri).openStream()), charset));
	} else {
		File playlistfile = new File(uri);
		if (playlistfile.length() < 10000000) {
			return new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(playlistfile)), charset));
		}
	}
	return null;
}
 
Example 3
Project: mojito   File: CommandHelper.java   View source code 6 votes vote down vote up
/**
 * Get content from {@link java.nio.file.Path} using UTF8
 *
 * @param path
 * @return
 * @throws CommandException
 */
public String getFileContent(Path path) throws CommandException {
    try {
        File file = path.toFile();
        BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(file), false, boms);
        String fileContent;
        if (inputStream.hasBOM()) {
            fileContent = IOUtils.toString(inputStream, inputStream.getBOMCharsetName());
        } else {
            fileContent = IOUtils.toString(inputStream, StandardCharsets.UTF_8);
        }
        return fileContent;
    } catch (IOException e) {
        throw new CommandException("Cannot get file content for path: " + path.toString(), e);
    }
}
 
Example 4
Project: dss   File: TestBOM.java   View source code 6 votes vote down vote up
@Test
public void test() throws IOException {
	ApacheCommonsUtils acu = new ApacheCommonsUtils();

	FileInputStream fis = new FileInputStream(new File("src/test/resources/lotl_utf-8-sansbom.xml"));
	FileInputStream fisBom = new FileInputStream(new File("src/test/resources/lotl_utf-8.xml"));

	assertNotEquals(acu.toBase64(acu.toByteArray(fis)), acu.toBase64(acu.toByteArray(fisBom)));

	fis = new FileInputStream(new File("src/test/resources/lotl_utf-8-sansbom.xml"));
	fisBom = new FileInputStream(new File("src/test/resources/lotl_utf-8.xml"));

	BOMInputStream bomIS = new BOMInputStream(fis);
	BOMInputStream bomISSkipped = new BOMInputStream(fisBom);

	assertEquals(acu.toBase64(acu.toByteArray(bomIS)), acu.toBase64(acu.toByteArray(bomISSkipped)));
}
 
Example 5
Project: crawler-commons   File: SiteMapParser.java   View source code 6 votes vote down vote up
/**
 * Process a text-based Sitemap. Text sitemaps only list URLs but no
 * priorities, last mods, etc.
 *
 * @param sitemapUrl
 *            URL to sitemap file
 * @param stream
 *            content stream
 * @return The site map
 * @throws IOException
 *             if there is an error reading in the site map content
 */
protected SiteMap processText(URL sitemapUrl, InputStream stream) throws IOException {
    LOG.debug("Processing textual Sitemap");

    SiteMap textSiteMap = new SiteMap(sitemapUrl);
    textSiteMap.setType(SitemapType.TEXT);

    BOMInputStream bomIs = new BOMInputStream(stream);
    @SuppressWarnings("resource")
    BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, UTF_8));

    String line;
    int i = 1;
    while ((line = reader.readLine()) != null) {
        if (line.length() > 0 && i <= MAX_URLS) {
            addUrlIntoSitemap(line, textSiteMap, null, null, null, i++);
        }
    }
    textSiteMap.setProcessed(true);

    return textSiteMap;
}
 
Example 6
Project: crawler-commons   File: SiteMapParser.java   View source code 6 votes vote down vote up
/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * 
 * @param url
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws UnknownFormatException
 *             if there is an error parsing the gzip
 * @throws IOException
 *             if there is an error reading in the gzip {@link java.net.URL}
 */
protected AbstractSiteMap processGzippedXML(URL url, byte[] response) throws IOException, UnknownFormatException {

    LOG.debug("Processing gzipped XML");

    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");
    LOG.debug("XML url = {}", xmlUrl);

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    return processXml(url, in);
}
 
Example 7
Project: easyjasub   File: InputTextSubFile.java   View source code 6 votes vote down vote up
public InputTextSubFile(SubtitleFileType inputFormat, String fileName,
		InputStream is) throws InputTextSubException, IOException {
	try {
		tto = createFormat(inputFormat).parseFile(fileName,
				new BOMInputStream(is));
	} catch (FatalParsingException ex) {
		throw new InputTextSubException(
				"Parse error returned by subtitle read library", ex);
	}
	captions = new ArrayList<InputSubtitleLine>(tto.captions.size());
	for (Caption caption : tto.captions.values()) {
		InputSubtitleLine line = new InputSubtitleLine();
		line.setContent(caption.content);
		line.setStartTime(new SubtitleFileTimeWrapper(caption.start)
				.getMSeconds());
		line.setEndTime(new SubtitleFileTimeWrapper(caption.end)
				.getMSeconds());
		captions.add(line);
	}
}
 
Example 8
Project: org.fastnate   File: AbstractCsvReader.java   View source code 6 votes vote down vote up
/**
 * Opens a CSV file.
 *
 * If the given file ends with "gz", then the file is decompressed before using a {@link GZIPInputStream}.
 *
 * @param importFile
 *            the csv file
 * @return a list reader
 * @throws IOException
 *             on io exception
 */
@SuppressWarnings("resource")
protected CsvListReader openCsvListReader(final File importFile) throws IOException {
	// Open file
	InputStream fileStream = new FileInputStream(importFile);

	// Check for compressed file
	if (importFile.getName().toLowerCase().endsWith(".gz")) {
		fileStream = new GZIPInputStream(fileStream);
	}

	// Guess the encoding
	final BOMInputStream inputStream = new BOMInputStream(fileStream, false, ByteOrderMark.UTF_8,
			ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
	final String charset;
	if (inputStream.hasBOM()) {
		charset = inputStream.getBOMCharsetName();
		log.info("BOM detected. Using {} as encoding", charset);
	} else {
		charset = getDefaultEncoding().toString();
		log.info("No BOM detected. Assuming {} as encoding", charset);
	}
	final Reader reader = new InputStreamReader(inputStream, charset);
	return new CsvListReader(reader, new CsvPreference.Builder(CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE)
			.skipComments(new CommentMatches("(//|/\\*|#|;).*")).build());
}
 
Example 9
Project: dclib   File: ConverterService.java   View source code 6 votes vote down vote up
/**
 * Simple invocation. Load template and data from a file, run process
 * and return memory model containing results or null if there was a problem.
 * Problems/progress reporting live to given reporter
 * @param templateFile the name of the template file to use
 * @param dataFile  the name of the data file to process
 * @param report the message reporter
 * @param debug set to true to enable voluminous debug message
 * @param allowNullRows set to true to allow output even if some rows don't match
 * @throws IOException 
 */
public Model simpleConvert(String templateFile, String dataFile, ProgressMonitorReporter reporter, boolean debug, boolean allowNullRows) throws IOException {
    Template template = TemplateFactory.templateFrom(templateFile, dc);
    
    File dataFileF = new File(dataFile);
    String filename = dataFileF.getName();
    String filebasename = NameUtils.removeExtension(filename);
    put(ConverterProcess.FILE_NAME, filename);
    put(ConverterProcess.FILE_BASE_NAME, filebasename);
    InputStream is = new BOMInputStream( new FileInputStream(dataFileF) );
    
    ConverterProcess process = new ConverterProcess(dc, is);
    process.setDebug(debug);
    process.setTemplate( template );
    process.setMessageReporter( reporter );
    process.setAllowNullRows(allowNullRows);
    boolean ok = process.process();
    
    return ok ?  process.getModel() : null;
}
 
Example 10
Project: rosa   File: JsonldJenaUtils.java   View source code 6 votes vote down vote up
/**
 * Generate a single Jena model from several different files, output it to 
 * specified OutputStream
 * @param aggr String[] String array containing all relevant RDF files "name.extension"
 * @param out OutputStream
 * @param type an instance of ScDemoFile class
 * @throws IOException
 */
public static Model generateAggregateModel(String[] aggr, String lang) 
		throws IOException {
	
	Model model = ModelFactory.createDefaultModel();
	Model subModel = ModelFactory.createDefaultModel();
	
	for (int i=0; i<aggr.length; i++) {
		InputStream in = (JsonldJenaUtils.class).getClassLoader().getResourceAsStream(
				aggr[i]);
		BOMInputStream bIn = new BOMInputStream(in, false);
		
		subModel.read(bIn, null, lang);
		model = model.add(subModel);
		
		subModel.removeAll();
		bIn.close();
		in.close();
	}
	
	return model;
	
}
 
Example 11
Project: elasticsearch-river-remote   File: SiteMapParser.java   View source code 6 votes vote down vote up
/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * 
 * @param url - URL of the gzipped content
 * @param response - Gzipped content
 * @throws MalformedURLException
 * @throws IOException
 * @throws UnknownFormatException
 */
private AbstractSiteMap processGzip(URL url, byte[] response) throws MalformedURLException, IOException,
		UnknownFormatException {

	logger.debug("Processing gzip");

	AbstractSiteMap smi;

	InputStream is = new ByteArrayInputStream(response);

	// Remove .gz ending
	String xmlUrl = url.toString().replaceFirst("\\.gz$", "");

	logger.debug("XML url = " + xmlUrl);

	BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
	InputSource in = new InputSource(decompressed);
	in.setSystemId(xmlUrl);
	smi = processXml(url, in);
	decompressed.close();
	return smi;
}
 
Example 12
Project: file-type-plugin   File: FileType.java   View source code 5 votes vote down vote up
private String showByteOfMark(InputStream source) throws IOException {
  ByteOrderMark detectedBOM = new BOMInputStream(source).getBOM();
  if (detectedBOM == null) {
    return "";
  }
  String bom = detectedBOM.toString();
  FileType.logger.log(Level.INFO, "BOM: {0}", bom);
  return " w/ " + bom;
}
 
Example 13
Project: instalint   File: FileMetadata.java   View source code 5 votes vote down vote up
private static InputStream streamFile(File file) {
  try {
    return new BOMInputStream(new FileInputStream(file),
      ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
  } catch (FileNotFoundException e) {
    throw new IllegalStateException("File not found: " + file.getAbsolutePath(), e);
  }
}
 
Example 14
Project: file-format-streaming-converter   File: XlsxToCsvConverterTest.java   View source code 5 votes vote down vote up
private CSVParser createCsvParser(String inputFileName, String delimiter) throws IOException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(inputFileName)), Charsets.UTF_8));
    CSVFormat format = CSVFormat.newFormat(delimiter.charAt(0))
            .withSkipHeaderRecord()
            .withIgnoreEmptyLines()
            .withAllowMissingColumnNames()
            .withQuote('"')
            .withHeader();
    return new CSVParser(reader, format);
}
 
Example 15
Project: jijimaku   File: SubtitleFile.java   View source code 5 votes vote down vote up
public SubtitleFile(String fileName, String fileContents, String stylesStr) throws IOException, FatalParsingException {
  LOGGER.debug("Parsing subtitle file {}", fileName);

  TimedTextFileFormat timedTextFormat;
  switch (FilenameUtils.getExtension(fileName)) {
    case "ass":
      timedTextFormat = new FormatASS();
      break;
    case "srt":
      timedTextFormat = new FormatSRT();
      break;
    default:
      LOGGER.error("invalid subtitle file extension file: {}", fileName);
      throw new UnexpectedError();
  }

  // Convert String to InputStream to match subtitleFile API
  byte[] byteData = fileContents.getBytes("UTF-8");
  // Must use BOMInputStream otherwise files with BOM will broke :(((
  // => http://stackoverflow.com/questions/4897876/reading-utf-8-bom-marker
  try (BOMInputStream inputStream = new BOMInputStream(new ByteArrayInputStream(byteData))) {
    timedText = timedTextFormat.parseFile(fileName, inputStream, StandardCharsets.UTF_8);
  }

  if (timedText.warnings.length() > "List of non fatal errors produced during parsing:\n\n".length()) {
    LOGGER.warn("There was some warnings during parsing. See logs.");
    LOGGER.debug("Got warnings: {}", "\n" + timedText.warnings);
  }

  styles = parseStyles(stylesStr);
  timedText.styling = styles;
  timedText.description = JIJIMAKU_SIGNATURE;
  annotationCaptions = new TreeMap<>();

  // Initialization: add jijimaku mark and set style to Default
  addJijimakuMark();
  timedText.captions.values().stream().forEach(c -> c.style = styles.get("Default"));

  captionIter = timedText.captions.entrySet().iterator();
}
 
Example 16
Project: mojito   File: CommandHelper.java   View source code 5 votes vote down vote up
/**
 * Writes the content into a file using same format as source file
 *
 * @param content content to be written
 * @param path path to the file
 * @param sourceFileMatch
 * @throws CommandException
 */
public void writeFileContent(String content, Path path, FileMatch sourceFileMatch) throws CommandException {
    try {
        File outputFile = path.toFile();
        BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(sourceFileMatch.getPath().toFile()), false, boms);
        if (inputStream.hasBOM()) {
            FileUtils.writeByteArrayToFile(outputFile, inputStream.getBOM().getBytes());
            FileUtils.writeByteArrayToFile(outputFile, content.getBytes(inputStream.getBOMCharsetName()), true);
        } else {
            FileUtils.writeStringToFile(outputFile, content, StandardCharsets.UTF_8);
        }
    } catch (IOException e) {
        throw new CommandException("Cannot write file content in path: " + path.toString(), e);
    }
}
 
Example 17
Project: georocket   File: MimeTypeUtils.java   View source code 5 votes vote down vote up
/**
 * Read the first bytes of the given file and try to determine the file
 * format. Read up to 100 KB before giving up.
 * @param f the file to read
 * @return the file format (or <code>null</code> if the format
 * could not be determined)
 * @throws IOException if the input stream could not be read
 */
public static String detect(File f) throws IOException {
  if (!f.exists()) {
    return null;
  }
  try (BufferedInputStream bis = new BufferedInputStream(new BOMInputStream(
      new FileInputStream(f)))) {
    return determineFileFormat(bis);
  }
}
 
Example 18
Project: Open-Clinica-Data-Uploader   File: UploadController.java   View source code 5 votes vote down vote up
private Path saveFile(MultipartFile file) throws IOException {
    // Get the filename and build the local file path
    String filename = file.getOriginalFilename();
    String directory = System.getProperty("java.io.tmpdir");
    String filepath = Paths.get(directory, filename).toString();

    // Save the file locally
    try (BufferedOutputStream stream =
                 new BufferedOutputStream(new FileOutputStream(new File(filepath)));
         BOMInputStream bis = new BOMInputStream(file.getInputStream(), false)) {
        IOUtils.copy(bis, stream);
    }
    return Paths.get(filepath);
}
 
Example 19
Project: webz-server   File: FileDownloaderWithBOM.java   View source code 5 votes vote down vote up
public FileDownloaderWithBOM(WebzInputStreamDownloader downloader, String defaultEncoding) throws IOException, WebzException {

		this.bomIn = (BOMInputStream) new BOMInputStream(downloader.getInputStream(), false, ALL_BOMS);
		this.downloader = new FileDownloader(downloader.getFileSpecific(), bomIn);
		ByteOrderMark bom = bomIn.getBOM();

		if (bom == null) {
			actualEncoding = defaultEncoding;
			actualNumberOfBytes = downloader.getFileSpecific().getNumberOfBytes();
		} else {
			actualEncoding = bom.getCharsetName();
			actualNumberOfBytes = downloader.getFileSpecific().getNumberOfBytes() - bom.length();
		}
		reader = new InputStreamReader(bomIn, actualEncoding);
	}
 
Example 20
Project: spring-usc   File: EncodingDetector.java   View source code 5 votes vote down vote up
public static InputStreamReader getInputStreamReader(File file, String encoding) throws IOException {
    
    FileInputStream fis = new FileInputStream(file);
    logger.debug("Reading file: " + file + " using encoding: " + encoding);
    BOMInputStream bis = new BOMInputStream(fis); //So that we can remove the BOM
    return new InputStreamReader(bis, encoding);
}
 
Example 21
Project: dwca-io   File: DwcMetaFiles.java   View source code 5 votes vote down vote up
/**
 * Read the provided meta descriptor (e.g. meta.xml) and return a {@link Archive}.
 * @param metaDescriptor
 * @throws SAXException
 * @throws IOException
 * @throws UnsupportedArchiveException
 * @return a new {@link Archive}, never null
 */
public static Archive fromMetaDescriptor(InputStream metaDescriptor) throws SAXException, IOException, UnsupportedArchiveException {
  Archive archive = new Archive();
  try (BOMInputStream bomInputStream = new BOMInputStream(metaDescriptor)) {
    SAXParser p = SAX_FACTORY.newSAXParser();
    MetaXMLSaxHandler mh = new MetaXMLSaxHandler(archive);
    p.parse(bomInputStream, mh);
  } catch (ParserConfigurationException e) {
    throw new SAXException(e);
  }
  return archive;
}
 
Example 22
Project: digidoc4j   File: AsicContainerParser.java   View source code 5 votes vote down vote up
private void extractMimeType(ZipEntry entry) {
  try {
    InputStream zipFileInputStream = getZipEntryInputStream(entry);
    BOMInputStream bomInputStream = new BOMInputStream(zipFileInputStream);
    DSSDocument document = new InMemoryDocument(bomInputStream);
    mimeType = StringUtils.trim(IOUtils.toString(getDocumentBytes(document), "UTF-8"));
    extractAsicEntry(entry, document);
  } catch (IOException e) {
    logger.error("Error parsing container mime type: " + e.getMessage());
    throw new TechnicalException("Error parsing container mime type: " + e.getMessage(), e);
  }
}
 
Example 23
Project: srclib-java   File: Resolver.java   View source code 5 votes vote down vote up
/**
 * Tries to fetch POM model from maven central for a given dependency
 * @param dependency dependency to fetch model to
 * @return POM model if found and valid
 * @throws IOException
 * @throws XmlPullParserException
 */
private static Model fetchModel(RawDependency dependency)
        throws IOException, XmlPullParserException {

    // Get the url to the POM file for this artifact
    String url = "http://central.maven.org/maven2/"
            + dependency.groupID.replace('.', '/') + '/' + dependency.artifactID + '/'
            + dependency.version + '/' + dependency.artifactID + '-' + dependency.version + ".pom";
    InputStream input = new BOMInputStream(new URL(url).openStream());

    MavenXpp3Reader xpp3Reader = new MavenXpp3Reader();
    Model model = xpp3Reader.read(input);
    input.close();
    return model;
}
 
Example 24
Project: commons-csv   File: CSVParserTest.java   View source code 5 votes vote down vote up
@Test
public void testBOMInputStream_ParserWithInputStream() throws IOException {
    try (final BOMInputStream inputStream = createBOMInputStream("CSVFileParser/bom.csv");
            final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
        for (final CSVRecord record : parser) {
            final String string = record.get("Date");
            Assert.assertNotNull(string);
            // System.out.println("date: " + record.get("Date"));
        }
    }
}
 
Example 25
Project: es6draft   File: ChakraTest.java   View source code 5 votes vote down vote up
private static Charset charsetFor(BOMInputStream bis) throws IOException {
    ByteOrderMark bom = bis.getBOM();
    if (ByteOrderMark.UTF_8.equals(bom)) {
        return StandardCharsets.UTF_8;
    }
    if (ByteOrderMark.UTF_16LE.equals(bom)) {
        return StandardCharsets.UTF_16LE;
    }
    if (ByteOrderMark.UTF_16BE.equals(bom)) {
        return StandardCharsets.UTF_16BE;
    }
    return StandardCharsets.UTF_8;
}
 
Example 26
Project: olca-modules   File: AbstractImport.java   View source code 5 votes vote down vote up
public void run(File file, Seq seq, IDatabase database) throws Exception {
	this.seq = seq;
	this.database = database;
	CsvPreference pref = new CsvPreference.Builder('"', ';', "\n").build();
	try (FileInputStream fis = new FileInputStream(file);
			// exclude the byte order mark, if there is any
			BOMInputStream bom = new BOMInputStream(fis, false,
					ByteOrderMark.UTF_8);
			InputStreamReader reader = new InputStreamReader(bom, "utf-8");
			BufferedReader buffer = new BufferedReader(reader);
			CsvListReader csvReader = new CsvListReader(buffer, pref)) {
		importFile(csvReader, database);
	}
}
 
Example 27
Project: olca-modules   File: Maps.java   View source code 5 votes vote down vote up
private static CsvListReader createReader(InputStream stream)
		throws Exception {
	CsvPreference pref = new CsvPreference.Builder('"', ';', "\n").build();
	// exclude the byte order mark, if there is any
	BOMInputStream bom = new BOMInputStream(stream, false,
			ByteOrderMark.UTF_8);
	InputStreamReader reader = new InputStreamReader(bom, "utf-8");
	BufferedReader buffer = new BufferedReader(reader);
	CsvListReader csvReader = new CsvListReader(buffer, pref);
	return csvReader;
}
 
Example 28
Project: storm-crawler   File: CharsetIdentification.java   View source code 5 votes vote down vote up
/**
 * Detects any BOMs and returns the corresponding charset
 */
private static String getCharsetFromBOM(final byte[] byteData) {
    BOMInputStream bomIn = new BOMInputStream(new ByteArrayInputStream(
            byteData));
    try {
        ByteOrderMark bom = bomIn.getBOM();
        if (bom != null) {
            return bom.getCharsetName();
        }
    } catch (IOException e) {
        return null;
    }
    return null;
}
 
Example 29
Project: pentaho-kettle   File: CsvInput.java   View source code 5 votes vote down vote up
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws KettleException {
  String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() );
  String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() );
  String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() );

  try ( FileObject fileObject = KettleVFS.getFileObject( fileName, getTransMeta() );
      BOMInputStream inputStream =
          new BOMInputStream( KettleVFS.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE,
              ByteOrderMark.UTF_16BE ) ) {
    InputStreamReader reader = null;
    if ( Utils.isEmpty( realEncoding ) ) {
      reader = new InputStreamReader( inputStream );
    } else {
      reader = new InputStreamReader( inputStream, realEncoding );
    }
    EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() );
    String line =
        TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder(
            1000 ) );
    String[] fieldNames =
        CsvInput.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() );
    if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) {
      removeEnclosure( fieldNames, csvInputMeta.getEnclosure() );
    }
    trimFieldNames( fieldNames );
    return fieldNames;
  } catch ( IOException e ) {
    throw new KettleFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e );
  }
}
 
Example 30
Project: elasticsearch-river-remote   File: SiteMapParser.java   View source code 5 votes vote down vote up
/**
 * Process a text-based Sitemap. Text sitemaps only list URLs but no priorities, last mods, etc.
 * 
 * @param content
 * @throws IOException
 */
private SiteMap processText(byte[] content, String sitemapUrl) throws IOException {

	logger.debug("Processing textual Sitemap");

	SiteMap textSiteMap = new SiteMap(sitemapUrl);
	textSiteMap.setType(SitemapType.TEXT);

	BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content));
	@SuppressWarnings("resource")
	BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs));

	String line;

	int i = 1;
	while ((line = reader.readLine()) != null) {
		if (line.length() > 0 && i <= MAX_URLS) {
			try {
				URL url = new URL(line);
				boolean valid = urlIsLegal(textSiteMap.getBaseUrl(), url.toString());

				if (valid || !strict) {
					if (logger.isDebugEnabled()) {
						StringBuffer sb = new StringBuffer("  ");
						sb.append(i).append(". ").append(url);
						logger.debug(sb.toString());
					}
					i++;
					SiteMapURL surl = new SiteMapURL(url, valid);
					textSiteMap.addSiteMapUrl(surl);
				}
			} catch (MalformedURLException e) {
				logger.debug("Bad URL [" + line + "].");
			}
		}
	}
	textSiteMap.setProcessed(true);
	return textSiteMap;
}
 
Example 31
Project: languagetool   File: Main.java   View source code 5 votes vote down vote up
private void loadFile(File file) {
  try (FileInputStream inputStream = new FileInputStream(file)) {
    BOMInputStream bomIn = new BOMInputStream(inputStream, false,
            ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE,
            ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE);
    String charsetName;
    if (bomIn.hasBOM()) {
      bom = bomIn.getBOM();
      charsetName = bom.getCharsetName();
    } else {
      // No BOM found
      bom = null;
      charsetName = null;
    }
    String fileContents = StringTools.readStream(bomIn, charsetName);
    textArea.setText(fileContents);
    currentFile = file;
    updateTitle();
    if(recentFiles.contains(file.getAbsolutePath())) {
      recentFiles.remove(file.getAbsolutePath());
    }
    recentFiles.add(file.getAbsolutePath());
    localStorage.saveProperty("recentFiles", recentFiles);
    updateRecentFilesMenu();
  } catch (IOException e) {
    Tools.showError(e);
  }
}
 
Example 32
Project: languagetool   File: Main.java   View source code 5 votes vote down vote up
private InputStreamReader getInputStreamReader(String filename, String encoding) throws IOException {
  String charsetName = encoding != null ? encoding : Charset.defaultCharset().name();
  InputStream is = System.in;
  if (!isStdIn(filename)) {
    is = new FileInputStream(new File(filename));
    BOMInputStream bomIn = new BOMInputStream(is, true, ByteOrderMark.UTF_8,
      ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE,
      ByteOrderMark.UTF_32BE,ByteOrderMark.UTF_32LE);
    if (bomIn.hasBOM() && encoding == null) {
      charsetName = bomIn.getBOMCharsetName();
    }
    is = bomIn;
  }
  return new InputStreamReader(new BufferedInputStream(is), charsetName);
}
 
Example 33
Project: sparql-generate   File: LocatorURLAccept.java   View source code 4 votes vote down vote up
private TypedInputStream openConnectionCheckRedirects(URLConnection c) throws IOException {
    boolean redir;
    int redirects = 0;
    InputStream in = null;
    String contentType = null;
    String contentEncoding = null;
    do {
        if (c instanceof HttpURLConnection) {
            ((HttpURLConnection) c).setInstanceFollowRedirects(false);
        }
        // We want to open the input stream before getting headers
        // because getHeaderField() et al swallow IOExceptions.
        in = new BufferedInputStream(new BOMInputStream(c.getInputStream()));
        contentType = c.getContentType();
        contentEncoding = c.getContentEncoding();
        redir = false;
        if (c instanceof HttpURLConnection) {
            HttpURLConnection http = (HttpURLConnection) c;
            int stat = http.getResponseCode();
            if (stat >= 300 && stat <= 307 && stat != 306
                    && stat != HttpURLConnection.HTTP_NOT_MODIFIED) {
                URL base = http.getURL();
                String loc = http.getHeaderField("Location");
                URL target = null;
                if (loc != null) {
                    target = new URL(base, loc);
                }
                http.disconnect();
                // Redirection should be allowed only for HTTP and HTTPS
                // and should be limited to 5 redirections at most.
                if (target == null
                        || !(target.getProtocol().equals("http") || target.getProtocol().equals("https"))
                        || c.getURL().getProtocol().equals("https") && target.getProtocol().equals("http")
                        || redirects >= 5) {
                    throw new SecurityException("illegal URL redirect");
                }
                redir = true;
                c = target.openConnection();
                redirects++;
            }
        }
    } while (redir);
    if(contentType==null) {
        contentType = "text/plain";
    }
    return new TypedInputStream(in, contentType, contentEncoding);
}
 
Example 34
Project: AniML   File: DataTable.java   View source code 4 votes vote down vote up
public static DataTable loadCSV(String fileName, String formatType, VariableType[] colTypesOverride, String[] colNamesOverride, boolean hasHeaderRow) {
	try {
		// use apache commons io + csv to load but convert to list of String[]
		// byte-order markers are handled if present at start of file.
		FileInputStream fis = new FileInputStream(fileName);
		final Reader reader = new InputStreamReader(new BOMInputStream(fis), "UTF-8");
		CSVFormat format;
		if ( formatType==null ) {
			format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
		}
		else {
			switch ( formatType.toLowerCase() ) {
				case "tsv":
					format = hasHeaderRow ? CSVFormat.TDF.withHeader() : CSVFormat.TDF;
					break;
				case "mysql":
					format = hasHeaderRow ? CSVFormat.MYSQL.withHeader() : CSVFormat.MYSQL;
					break;
				case "excel":
					format = hasHeaderRow ? CSVFormat.EXCEL.withHeader() : CSVFormat.EXCEL;
					break;
				case "rfc4180":
				default:
					format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
					break;
			}
		}
		final CSVParser parser = new CSVParser(reader, format);
		List<String[]> rows = new ArrayList<>();
		int numHeaderNames = parser.getHeaderMap().size();
		try {
			for (final CSVRecord record : parser) {
				String[] row = new String[record.size()];
				for (int j = 0; j<record.size(); j++) {
					row[j] = record.get(j);
				}
				rows.add(row);
			}
		}
		finally {
			parser.close();
			reader.close();
		}

		VariableType[] actualTypes = computeColTypes(rows, numHeaderNames);

		Set<String> colNameSet = parser.getHeaderMap().keySet();
		String[] colNames = colNameSet.toArray(new String[colNameSet.size()]);
		if ( colNamesOverride!=null ) {
			colNames = colNamesOverride;
		}
		if ( colTypesOverride!=null ) {
			actualTypes = colTypesOverride;
		}
		return fromStrings(rows, actualTypes, colNames, false);
	}
	catch (Exception e) {
		throw new IllegalArgumentException("Can't open and/or read "+fileName, e);
	}
}
 
Example 35
Project: AniML   File: DataTable.java   View source code 4 votes vote down vote up
public static DataTable loadCSV(String fileName, VariableType[] colTypes, boolean hasHeaderRow) {
		int numCols = colTypes.length;
		try {
			final FileInputStream fis = new FileInputStream(fileName);
			final Reader r = new InputStreamReader(new BOMInputStream(fis), "UTF-8");
			final BufferedReader bf = new BufferedReader(r);
			List<int[]> rows = new ArrayList<>();
			String line;
			String[] colNames = null;
			if ( hasHeaderRow ) {
				line=bf.readLine();
				if ( line!=null ) {
					line = line.trim();
					if ( line.length()>0 ) {
						colNames = line.split(",");
						for (int i = 0; i<colNames.length; i++) {
							colNames[i] = colNames[i].trim();
						}
					}
				}
			}
			int n = 0;
			while ( (line=bf.readLine())!=null ) {
				if ( n>0 && n % 10000 == 0 ) System.out.println(n);
				line = line.trim();
				if ( line.length()==0 ) continue;
				int[] row = new int[numCols];
				int comma = line.indexOf(',', 0);
				int prev = 0;
				int col = 0;
				while ( comma>=0 ) {
					String v = line.substring(prev, comma);
					row[col] = getValue(colTypes[col], v);

					prev = comma+1;
					comma = line.indexOf(',', comma+1);
					col++;
				}
				// grab last element after last comma
				String lastv = line.substring(prev, line.length());
				row[col] = getValue(colTypes[col], lastv);

//				System.out.println();
				rows.add(row);
				n++;
			}

			DataTable data = new DataTable(rows, colTypes, colNames, null);
			return data;
		}
		catch (IOException ioe) {
			throw new IllegalArgumentException("Can't open and/or read "+fileName, ioe);
		}
	}
 
Example 36
Project: reference-ccda-validator   File: ReferenceCCDAValidationService.java   View source code 4 votes vote down vote up
private List<RefCCDAValidationResult> runValidators(String validationObjective, String referenceFileName,
                                                    MultipartFile ccdaFile) throws SAXException, Exception {
    List<RefCCDAValidationResult> validatorResults = new ArrayList<>();
    InputStream ccdaFileInputStream = null;
    try {
        ccdaFileInputStream = ccdaFile.getInputStream();
        String ccdaFileContents = IOUtils.toString(new BOMInputStream(ccdaFileInputStream));

        List<RefCCDAValidationResult> mdhtResults = doMDHTValidation(validationObjective, referenceFileName, ccdaFileContents);
        if(mdhtResults != null && !mdhtResults.isEmpty()) {
        	logger.info("Adding MDHT results");
        	validatorResults.addAll(mdhtResults);
        }
        
        boolean isSchemaErrorInMdhtResults = mdhtResultsHaveSchemaError(mdhtResults);
        boolean isObjectiveAllowingVocabularyValidation = objectiveAllowsVocabularyValidation(validationObjective);
        if (!isSchemaErrorInMdhtResults && isObjectiveAllowingVocabularyValidation) {
            List<RefCCDAValidationResult> vocabResults = doVocabularyValidation(validationObjective, referenceFileName, ccdaFileContents);
        	if(vocabResults != null && !vocabResults.isEmpty()) {
        		logger.info("Adding Vocabulary results");
        		validatorResults.addAll(vocabResults);
        	}
        	
        	if(objectiveAllowsContentValidation(validationObjective)) {
             List<RefCCDAValidationResult> contentResults = doContentValidation(validationObjective, referenceFileName, ccdaFileContents);
         	if(contentResults != null && !contentResults.isEmpty()) {
         		logger.info("Adding Content results");
             	validatorResults.addAll(contentResults);
         	}
        	} else {
            	logger.info("Skipping Content validation due to: "
            			+ "validationObjective (" + (validationObjective != null ? validationObjective : "null objective") 
            			+ ") is not relevant or valid for Content validation");            		
        	}
        } else {
        	String separator = !isObjectiveAllowingVocabularyValidation && isSchemaErrorInMdhtResults ? " and " : "";
        	logger.info("Skipping Vocabulary (and thus Content) validation due to: " 
        			+ (isObjectiveAllowingVocabularyValidation ? "" : "validationObjective POSTed: " 
        			+ (validationObjective != null ? validationObjective : "null objective") + separator) 
        			+ (isSchemaErrorInMdhtResults ? "C-CDA Schema error(s) found" : ""));
        }
    } catch (IOException e) {
        throw new RuntimeException("Error getting CCDA contents from provided file", e);
    }finally {
        closeFileInputStream(ccdaFileInputStream);
    }
    return validatorResults;
}
 
Example 37
Project: gtfs-lib   File: Entity.java   View source code 4 votes vote down vote up
/**
 * The main entry point into an Entity.Loader. Interprets each row of a CSV file within a zip file as a sinle
 * GTFS entity, and loads them into a table.
 *
 * @param zip the zip file from which to read a table
 */
public void loadTable(ZipFile zip) throws IOException {
    ZipEntry entry = zip.getEntry(tableName + ".txt");
    if (entry == null) {
        Enumeration<? extends ZipEntry> entries = zip.entries();
        // check if table is contained within sub-directory
        while (entries.hasMoreElements()) {
            ZipEntry e = entries.nextElement();
            if (e.getName().endsWith(tableName + ".txt")) {
                entry = e;
                feed.errors.add(new TableInSubdirectoryError(tableName, entry.getName().replace(tableName + ".txt", "")));
            }
        }
        /* This GTFS table did not exist in the zip. */
        if (this.isRequired()) {
            feed.errors.add(new MissingTableError(tableName));
        } else {
            LOG.info("Table {} was missing but it is not required.", tableName);
        }

        if (entry == null) return;
    }
    LOG.info("Loading GTFS table {} from {}", tableName, entry);
    InputStream zis = zip.getInputStream(entry);
    // skip any byte order mark that may be present. Files must be UTF-8,
    // but the GTFS spec says that "files that include the UTF byte order mark are acceptable"
    InputStream bis = new BOMInputStream(zis);
    CsvReader reader = new CsvReader(bis, ',', Charset.forName("UTF8"));
    this.reader = reader;
    boolean hasHeaders = reader.readHeaders();
    if (!hasHeaders) {
        feed.errors.add(new EmptyTableError(tableName));
    }
    while (reader.readRecord()) {
        // reader.getCurrentRecord() is zero-based and does not include the header line, keep our own row count
        if (++row % 500000 == 0) {
            LOG.info("Record number {}", human(row));
        }
        loadOneRow(); // Call subclass method to produce an entity from the current row.
    }
    if (row == 0) {
        feed.errors.add(new EmptyTableError(tableName));
    }
}
 
Example 38
Project: eMonocot   File: BOMIgnoringBufferedReaderFactory.java   View source code 4 votes vote down vote up
@Override
public BufferedReader create(Resource resource, String encoding)
		throws UnsupportedEncodingException, IOException {
	BOMInputStream bomInputStream = new BOMInputStream(resource.getInputStream());
	return new BufferedReader(new InputStreamReader(bomInputStream, encoding));
}
 
Example 39
Project: powop   File: BOMIgnoringBufferedReaderFactory.java   View source code 4 votes vote down vote up
@Override
public BufferedReader create(Resource resource, String encoding)
		throws UnsupportedEncodingException, IOException {
	BOMInputStream bomInputStream = new BOMInputStream(resource.getInputStream());
	return new BufferedReader(new InputStreamReader(bomInputStream, encoding));
}
 
Example 40
Project: spring-usc   File: EncodingDetector.java   View source code 4 votes vote down vote up
public static InputStreamReader getInputStreamReader(InputStream is, String encoding) throws IOException {
    
    logger.debug("Reading stream: using encoding: " + encoding);
    BOMInputStream bis = new BOMInputStream(is); //So that we can remove the BOM
    return new InputStreamReader(bis, encoding);
}