/* * Copyright [2016-2020] [George Papadakis ([email protected])] * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.scify.jedai.datareader; import org.apache.log4j.BasicConfigurator; import org.scify.jedai.datamodel.EntityProfile; import org.scify.jedai.datareader.entityreader.EntityCSVReader; import java.util.List; import java.util.Set; import org.scify.jedai.datamodel.IdDuplicates; import org.scify.jedai.datareader.groundtruthreader.GtCSVReader; /** * * @author G.A.P. II */ public class TestEntityCSVReader { public static void main(String[] args) { BasicConfigurator.configure(); String mainDirectory = "/home/gap2/Downloads/DBLP-ACM/"; EntityCSVReader csvReader = new EntityCSVReader(mainDirectory + "DBLP2.csv"); csvReader.setAttributeNamesInFirstRow(true); csvReader.setSeparator(','); // csvReader.setAttributesToExclude(new int[]{1}); csvReader.setIdIndex(0); List<EntityProfile> profilesD1 = csvReader.getEntityProfiles(); System.out.println("Entities from Dataset 1\t:\t" + profilesD1.size()); csvReader = new EntityCSVReader(mainDirectory + "ACM.csv"); csvReader.setAttributeNamesInFirstRow(true); csvReader.setSeparator(','); // csvReader.setAttributesToExclude(new int[]{1}); csvReader.setIdIndex(0); List<EntityProfile> profilesD2 = csvReader.getEntityProfiles(); System.out.println("Entities from Dataset 2\t:\t" + profilesD2.size()); GtCSVReader gtCsvReader = new GtCSVReader(mainDirectory + "DBLP-ACM_perfectMapping.csv"); gtCsvReader.setIgnoreFirstRow(true); gtCsvReader.setSeparator(","); Set<IdDuplicates> duplicates = gtCsvReader.getDuplicatePairs(profilesD1, profilesD2); System.out.println("Duplicates\t:\t" + duplicates.size()); } }