/******************************************************************************* * Copyright 2014 A3 lab (Dipartimento di Informatica, Università di Pisa) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package it.acubelab.tagme.preprocessing; import it.acubelab.PLogger; import it.acubelab.PLogger.Step; import it.acubelab.tagme.config.TagmeConfig; import it.acubelab.tagme.config.Config.RepositoryDirs; import it.acubelab.tagme.preprocessing.support.AllWIDs; import it.unimi.dsi.fastutil.ints.IntSet; import java.io.IOException; import org.apache.lucene.index.IndexReader; public class TopicDocs extends Dataset<int[]> { public TopicDocs(String lang) { super(lang); } @Override protected int[] parseSet() throws IOException { IntSet WIDs = new AllWIDs(lang).getDataset(); int max_wid = 0; for(int wid: WIDs) if (wid > max_wid) max_wid = wid; IndexReader topics = Indexes.getReader(RepositoryDirs.TOPICS.getPath(lang)); int max = topics.maxDoc(); int[] map = new int[max_wid+1]; for(int i=0;i<map.length; i++) map[i]=-1; PLogger plog = new PLogger(log, Step.MINUTE) .setEnd(max) .start(); for(int i=0;i<max;i++) { map[Integer.parseInt(topics.document(i).get(TopicIndexer.FIELD_WID))] = i; plog.update(); } plog.stop(); return map; } }