/** * This file is part of General Entity Annotator Benchmark. * * General Entity Annotator Benchmark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * General Entity Annotator Benchmark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>. */ package org.aksw.gerbil.dataset.datahub; import java.io.File; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import org.aksw.gerbil.config.GerbilConfiguration; import org.aksw.gerbil.dataset.AbstractDatasetConfiguration; import org.aksw.gerbil.dataset.Dataset; import org.aksw.gerbil.dataset.check.EntityCheckerManager; import org.aksw.gerbil.dataset.impl.nif.FileBasedNIFDataset; import org.aksw.gerbil.datatypes.ExperimentType; import org.aksw.gerbil.semantic.sameas.SameAsRetriever; import org.apache.jena.riot.Lang; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.web.client.RestTemplate; public class DatahubNIFConfig extends AbstractDatasetConfiguration { private static final Logger logger = LoggerFactory.getLogger(DatahubNIFConfig.class); private static final String DATAHUB_DATASET_FILE_PROPERTY_NAME = "org.aksw.gerbil.datasets.DatahubNIFConfig.datasetFiles"; private String datasetUrl; private RestTemplate rt; public DatahubNIFConfig(String datasetName, String datasetUrl, boolean couldBeCached, EntityCheckerManager entityCheckerManager, SameAsRetriever globalRetriever) { super(datasetName, couldBeCached, ExperimentType.A2KB, entityCheckerManager, globalRetriever); this.datasetUrl = datasetUrl; rt = new RestTemplate(); } /** * We have to synchronize this method. Otherwise every experiment thread * would check the file and try to download the data or try to use the file * even the download hasn't been completed. */ @Override protected synchronized Dataset loadDataset() throws Exception { String nifFile = GerbilConfiguration.getInstance().getString(DATAHUB_DATASET_FILE_PROPERTY_NAME) + getName(); logger.debug("FILE {}", nifFile); File f = new File(nifFile); if (!f.exists()) { logger.debug("file {} does not exist. need to download", nifFile); String data = rt.getForObject(datasetUrl, String.class); Path path = Paths.get(nifFile); Files.createDirectories(path.getParent()); Path file = Files.createFile(path); Files.write(file, data.getBytes(), StandardOpenOption.WRITE); } FileBasedNIFDataset dataset = new FileBasedNIFDataset(nifFile, getName(), Lang.TTL); dataset.init(); return dataset; } }