Java Code Examples for com.jstarcraft.ai.data.DataModule#getQualityInner()

The following examples show how to use com.jstarcraft.ai.data.DataModule#getQualityInner() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SocialModel.java    From jstarcraft-rns with Apache License 2.0 6 votes vote down vote up
@Override
public void prepare(Configurator configuration, DataModule model, DataSpace space) {
    super.prepare(configuration, model, space);

    socialRegularization = configuration.getFloat("recommender.social.regularization", 0.01f);
    // social path for the socialMatrix
    // TODO 此处是不是应该使用context.getSimilarity().getSimilarityMatrix();代替?
    DataModule socialModel = space.getModule("social");
    // TODO 此处需要重构,trusterDimension与trusteeDimension要配置
    coefficientField = configuration.getString("data.model.fields.coefficient");
    trusterDimension = socialModel.getQualityInner(userField) + 0;
    trusteeDimension = socialModel.getQualityInner(userField) + 1;
    coefficientDimension = socialModel.getQuantityInner(coefficientField);
    HashMatrix matrix = new HashMatrix(true, userSize, userSize, new Long2FloatRBTreeMap());
    for (DataInstance instance : socialModel) {
        matrix.setValue(instance.getQualityFeature(trusterDimension), instance.getQualityFeature(trusteeDimension), instance.getQuantityFeature(coefficientDimension));
    }
    socialMatrix = SparseMatrix.valueOf(userSize, userSize, matrix);
}
 
Example 2
Source File: RandomSeparator.java    From jstarcraft-rns with Apache License 2.0 6 votes vote down vote up
public RandomSeparator(DataSpace space, DataModule dataModule, String matchField, float random) {
    this.dataModule = dataModule;
    ReferenceModule[] modules;
    if (matchField == null) {
        modules = new ReferenceModule[] { new ReferenceModule(dataModule) };
    } else {
        int matchDimension = dataModule.getQualityInner(matchField);
        DataSplitter splitter = new QualityFeatureDataSplitter(matchDimension);
        int size = space.getQualityAttribute(matchField).getSize();
        modules = splitter.split(dataModule, size);
    }
    this.trainReference = new IntegerArray();
    this.testReference = new IntegerArray();
    for (ReferenceModule module : modules) {
        IntegerArray reference = module.getReference();
        for (int cursor = 0, length = reference.getSize(); cursor < length; cursor++) {
            if (RandomUtility.randomFloat(1F) < random) {
                this.trainReference.associateData(reference.getData(cursor));
            } else {
                this.testReference.associateData(reference.getData(cursor));
            }
        }
    }
}
 
Example 3
Source File: MovieDataConfigurer.java    From jstarcraft-example with Apache License 2.0 5 votes vote down vote up
/**
 * 装配数据模型
 * 
 * @param movieDataSpace
 * @return
 */
@Bean("movieDataModule")
DataModule getMovieDataModule(DataSpace movieDataSpace, List<MovieUser> movieUsers, List<MovieItem> movieItems) throws Exception {
    TreeMap<Integer, String> configuration = new TreeMap<>();
    configuration.put(1, "user");
    configuration.put(2, "item");
    configuration.put(3, "score");
    configuration.put(4, "instant");
    DataModule dataModule = movieDataSpace.makeDenseModule("score", configuration, 1000000);

    File file = new File("data/ml-100k/u.data");
    CSVFormat format = CSVFormat.DEFAULT.withDelimiter('\t');
    DataConverter<InputStream> convertor = new CsvConverter(format, movieDataSpace.getQualityAttributes(), movieDataSpace.getQuantityAttributes());
    try (InputStream stream = new FileInputStream(file)) {
        convertor.convert(dataModule, stream);
    }

    int userDimension = dataModule.getQualityInner("user");
    int itemDimension = dataModule.getQualityInner("item");
    int scoreDimension = dataModule.getQuantityInner("score");
    for (DataInstance instance : dataModule) {
        int userIndex = instance.getQualityFeature(userDimension);
        int itemIndex = instance.getQualityFeature(itemDimension);
        instance.setQuantityMark(instance.getQuantityFeature(scoreDimension));
        movieUsers.get(userIndex).click(itemIndex);
    }

    return dataModule;
}
 
Example 4
Source File: DeepFMModel.java    From jstarcraft-rns with Apache License 2.0 5 votes vote down vote up
@Override
public void prepare(Configurator configuration, DataModule model, DataSpace space) {
    super.prepare(configuration, model, space);
    learnRatio = configuration.getFloat("recommender.iterator.learnrate");
    momentum = configuration.getFloat("recommender.iterator.momentum");
    weightRegularization = configuration.getFloat("recommender.weight.regularization");
    this.marker = model;

    // TODO 此处需要重构,外部索引与内部索引的映射转换
    dimensionSizes = new int[model.getQualityOrder()];
    for (int orderIndex = 0, orderSize = model.getQualityOrder(); orderIndex < orderSize; orderIndex++) {
        Entry<Integer, KeyValue<String, Boolean>> term = model.getOuterKeyValue(orderIndex);
        dimensionSizes[model.getQualityInner(term.getValue().getKey())] = space.getQualityAttribute(term.getValue().getKey()).getSize();
    }
}
 
Example 5
Source File: AbstractModel.java    From jstarcraft-rns with Apache License 2.0 5 votes vote down vote up
@Override
public void prepare(Configurator configuration, DataModule model, DataSpace space) {
    userField = configuration.getString("data.model.fields.user", "user");
    itemField = configuration.getString("data.model.fields.item", "item");

    userDimension = model.getQualityInner(userField);
    itemDimension = model.getQualityInner(itemField);
    userSize = space.getQualityAttribute(userField).getSize();
    itemSize = space.getQualityAttribute(itemField).getSize();

    DataSplitter splitter = new QualityFeatureDataSplitter(userDimension);
    DataModule[] models = splitter.split(model, userSize);
    DataSorter sorter = new AllFeatureDataSorter();
    for (int index = 0; index < userSize; index++) {
        models[index] = sorter.sort(models[index]);
    }

    HashMatrix dataTable = new HashMatrix(true, userSize, itemSize, new Long2FloatRBTreeMap());
    for (DataInstance instance : model) {
        int rowIndex = instance.getQualityFeature(userDimension);
        int columnIndex = instance.getQualityFeature(itemDimension);
        dataTable.setValue(rowIndex, columnIndex, instance.getQuantityMark());
    }
    scoreMatrix = SparseMatrix.valueOf(userSize, itemSize, dataTable);
    actionSize = scoreMatrix.getElementSize();
    KeyValue<Float, Float> attribute = scoreMatrix.getBoundary(false);
    minimumScore = attribute.getKey();
    maximumScore = attribute.getValue();
    meanScore = scoreMatrix.getSum(false);
    meanScore /= actionSize;
}
 
Example 6
Source File: DeepFMModel.java    From jstarcraft-rns with Apache License 2.0 5 votes vote down vote up
@Override
public void prepare(Configurator configuration, DataModule model, DataSpace space) {
    super.prepare(configuration, model, space);
    learnRatio = configuration.getFloat("recommender.iterator.learnrate");
    momentum = configuration.getFloat("recommender.iterator.momentum");
    weightRegularization = configuration.getFloat("recommender.weight.regularization");
    this.marker = model;

    // TODO 此处需要重构,外部索引与内部索引的映射转换
    dimensionSizes = new int[model.getQualityOrder()];
    for (int orderIndex = 0, orderSize = model.getQualityOrder(); orderIndex < orderSize; orderIndex++) {
        Entry<Integer, KeyValue<String, Boolean>> term = model.getOuterKeyValue(orderIndex);
        dimensionSizes[model.getQualityInner(term.getValue().getKey())] = space.getQualityAttribute(term.getValue().getKey()).getSize();
    }
}
 
Example 7
Source File: RankGeoFMModel.java    From jstarcraft-rns with Apache License 2.0 4 votes vote down vote up
@Override
public void prepare(Configurator configuration, DataModule model, DataSpace space) {
    super.prepare(configuration, model, space);
    margin = configuration.getFloat("recommender.ranking.margin", 0.3F);
    radius = configuration.getFloat("recommender.regularization.radius", 1F);
    balance = configuration.getFloat("recommender.regularization.balance", 0.2F);
    knn = configuration.getInteger("recommender.item.nearest.neighbour.number", 300);

    longitudeField = configuration.getString("data.model.fields.longitude");
    latitudeField = configuration.getString("data.model.fields.latitude");

    DataModule locationModel = space.getModule("location");
    longitudeDimension = locationModel.getQuantityInner(longitudeField);
    latitudeDimension = locationModel.getQuantityInner(latitudeField);

    geoInfluences = DenseMatrix.valueOf(itemSize, factorSize);

    explicitUserFactors = DenseMatrix.valueOf(userSize, factorSize);
    explicitUserFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
    implicitUserFactors = DenseMatrix.valueOf(userSize, factorSize);
    implicitUserFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
    itemFactors = DenseMatrix.valueOf(itemSize, factorSize);
    itemFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });

    itemLocations = new Float2FloatKeyValue[itemSize];

    int itemDimension = locationModel.getQualityInner(itemField);
    for (DataInstance instance : locationModel) {
        int itemIndex = instance.getQualityFeature(itemDimension);
        Float2FloatKeyValue itemLocation = new Float2FloatKeyValue(instance.getQuantityFeature(longitudeDimension), instance.getQuantityFeature(latitudeDimension));
        itemLocations[itemIndex] = itemLocation;
    }
    calculateNeighborWeightMatrix(knn);

    E = DenseVector.valueOf(itemSize + 1);
    E.setValue(1, 1F);
    for (int itemIndex = 2; itemIndex <= itemSize; itemIndex++) {
        E.setValue(itemIndex, E.getValue(itemIndex - 1) + 1F / itemIndex);
    }

    geoInfluences = DenseMatrix.valueOf(itemSize, factorSize);
}
 
Example 8
Source File: TopicMFMTModel.java    From jstarcraft-rns with Apache License 2.0 4 votes vote down vote up
@Override
public void prepare(Configurator configuration, DataModule model, DataSpace space) {
    super.prepare(configuration, model, space);

    commentField = configuration.getString("data.model.fields.comment");
    commentDimension = model.getQualityInner(commentField);
    MemoryQualityAttribute attribute = (MemoryQualityAttribute) space.getQualityAttribute(commentField);
    Object[] documentValues = attribute.getDatas();

    // init hyper-parameters
    lambda = configuration.getFloat("recommender.regularization.lambda", 0.001F);
    lambdaU = configuration.getFloat("recommender.regularization.lambdaU", 0.001F);
    lambdaV = configuration.getFloat("recommender.regularization.lambdaV", 0.001F);
    lambdaB = configuration.getFloat("recommender.regularization.lambdaB", 0.001F);
    numberOfTopics = configuration.getInteger("recommender.topic.number", 10);
    learnRatio = configuration.getFloat("recommender.iterator.learnrate", 0.01F);
    epocheSize = configuration.getInteger("recommender.iterator.maximum", 10);

    numberOfDocuments = scoreMatrix.getElementSize();

    // count the number of words, build the word dictionary and
    // userItemToDoc dictionary
    Map<String, Integer> wordDictionaries = new HashMap<>();
    Table<Integer, Integer, Float> documentTable = HashBasedTable.create();
    int rowCount = 0;
    userItemToDocument = HashBasedTable.create();
    for (DataInstance sample : model) {
        int userIndex = sample.getQualityFeature(userDimension);
        int itemIndex = sample.getQualityFeature(itemDimension);
        int documentIndex = sample.getQualityFeature(commentDimension);
        userItemToDocument.put(userIndex, itemIndex, rowCount);
        // convert wordIds to wordIndices
        String data = (String) documentValues[documentIndex];
        String[] words = data.isEmpty() ? new String[0] : data.split(":");
        for (String word : words) {
            Integer wordIndex = wordDictionaries.get(word);
            if (wordIndex == null) {
                wordIndex = numberOfWords++;
                wordDictionaries.put(word, wordIndex);
            }
            Float oldValue = documentTable.get(rowCount, wordIndex);
            if (oldValue == null) {
                oldValue = 0F;
            }
            float newValue = oldValue + 1F / words.length;
            documentTable.put(rowCount, wordIndex, newValue);
        }
        rowCount++;
    }
    // build W
    W = SparseMatrix.valueOf(numberOfDocuments, numberOfWords, documentTable);

    userBiases = DenseVector.valueOf(userSize);
    userBiases.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
    itemBiases = DenseVector.valueOf(itemSize);
    itemBiases.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
    userFactors = DenseMatrix.valueOf(userSize, numberOfTopics);
    userFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
    itemFactors = DenseMatrix.valueOf(itemSize, numberOfTopics);
    itemFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
    K = initStd;

    topicVector = DenseVector.valueOf(numberOfTopics);
    function = new SoftMaxActivationFunction();

    // init theta and phi
    // TODO theta实际是documentFactors
    documentFactors = DenseMatrix.valueOf(numberOfDocuments, numberOfTopics);
    calculateTheta();
    // TODO phi实际是wordFactors
    wordFactors = DenseMatrix.valueOf(numberOfTopics, numberOfWords);
    wordFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(RandomUtility.randomFloat(0.01F));
    });

    logger.info("number of users : " + userSize);
    logger.info("number of Items : " + itemSize);
    logger.info("number of words : " + wordDictionaries.size());
}
 
Example 9
Source File: TopicMFATModel.java    From jstarcraft-rns with Apache License 2.0 4 votes vote down vote up
@Override
public void prepare(Configurator configuration, DataModule model, DataSpace space) {
    super.prepare(configuration, model, space);

    commentField = configuration.getString("data.model.fields.comment");
    commentDimension = model.getQualityInner(commentField);
    MemoryQualityAttribute attribute = (MemoryQualityAttribute) space.getQualityAttribute(commentField);
    Object[] documentValues = attribute.getDatas();

    // init hyper-parameters
    lambda = configuration.getFloat("recommender.regularization.lambda", 0.001F);
    lambdaU = configuration.getFloat("recommender.regularization.lambdaU", 0.001F);
    lambdaV = configuration.getFloat("recommender.regularization.lambdaV", 0.001F);
    lambdaB = configuration.getFloat("recommender.regularization.lambdaB", 0.001F);
    numberOfTopics = configuration.getInteger("recommender.topic.number", 10);
    learnRatio = configuration.getFloat("recommender.iterator.learnrate", 0.01F);
    epocheSize = configuration.getInteger("recommender.iterator.maximum", 10);

    numberOfDocuments = scoreMatrix.getElementSize();

    // count the number of words, build the word dictionary and
    // userItemToDoc dictionary
    Map<String, Integer> wordDictionaries = new HashMap<>();
    Table<Integer, Integer, Float> documentTable = HashBasedTable.create();
    // TODO rowCount改为documentIndex?
    int rowCount = 0;
    userItemToDocument = HashBasedTable.create();
    for (DataInstance sample : model) {
        int userIndex = sample.getQualityFeature(userDimension);
        int itemIndex = sample.getQualityFeature(itemDimension);
        int documentIndex = sample.getQualityFeature(commentDimension);
        userItemToDocument.put(userIndex, itemIndex, rowCount);
        // convert wordIds to wordIndices
        String data = (String) documentValues[documentIndex];
        String[] words = data.isEmpty() ? new String[0] : data.split(":");
        for (String word : words) {
            Integer wordIndex = wordDictionaries.get(word);
            if (wordIndex == null) {
                wordIndex = numberOfWords++;
                wordDictionaries.put(word, wordIndex);
            }
            Float oldValue = documentTable.get(rowCount, wordIndex);
            if (oldValue == null) {
                oldValue = 0F;
            }
            float newValue = oldValue + 1F / words.length;
            documentTable.put(rowCount, wordIndex, newValue);
        }
        rowCount++;
    }
    // build W
    W = SparseMatrix.valueOf(numberOfDocuments, numberOfWords, documentTable);

    userBiases = DenseVector.valueOf(userSize);
    userBiases.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
    itemBiases = DenseVector.valueOf(itemSize);
    itemBiases.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
    userFactors = DenseMatrix.valueOf(userSize, numberOfTopics);
    userFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
    itemFactors = DenseMatrix.valueOf(itemSize, numberOfTopics);
    itemFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });

    K1 = initStd;
    K2 = initStd;

    topicVector = DenseVector.valueOf(numberOfTopics);
    function = new SoftMaxActivationFunction();

    // init theta and phi
    // TODO theta实际是documentFactors
    documentFactors = DenseMatrix.valueOf(numberOfDocuments, numberOfTopics);
    calculateTheta();
    // TODO phi实际是wordFactors
    wordFactors = DenseMatrix.valueOf(numberOfTopics, numberOfWords);
    wordFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(RandomUtility.randomFloat(0.01F));
    });

    logger.info("number of users : " + userSize);
    logger.info("number of Items : " + itemSize);
    logger.info("number of words : " + wordDictionaries.size());
}
 
Example 10
Source File: VBPRModel.java    From jstarcraft-rns with Apache License 2.0 4 votes vote down vote up
@Override
public void prepare(Configurator configuration, DataModule model, DataSpace space) {
    super.prepare(configuration, model, space);

    // TODO 此处代码可以消除(使用常量Marker代替或者使用binarize.threshold)
    for (MatrixScalar term : scoreMatrix) {
        term.setValue(1F);
    }

    biasRegularization = configuration.getFloat("recommender.bias.regularization", 0.1F);
    // TODO 此处应该修改为配置或者动态计算.
    numberOfFeatures = 4096;
    featureRegularization = 1000;
    sampleRatio = configuration.getInteger("recommender.vbpr.alpha", 5);

    itemBiases = DenseVector.valueOf(itemSize);
    itemBiases.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });

    itemFeatures = DenseVector.valueOf(numberOfFeatures);
    itemFeatures.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });

    userFeatures = DenseMatrix.valueOf(userSize, factorSize);
    userFeatures.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });

    featureFactors = DenseMatrix.valueOf(factorSize, numberOfFeatures);
    featureFactors.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });

    float minimumValue = Float.MAX_VALUE;
    float maximumValue = Float.MIN_VALUE;
    featureTable = new HashMatrix(true, itemSize, numberOfFeatures, new Long2FloatRBTreeMap());
    DataModule featureModel = space.getModule("article");
    String articleField = configuration.getString("data.model.fields.article");
    String featureField = configuration.getString("data.model.fields.feature");
    String degreeField = configuration.getString("data.model.fields.degree");
    int articleDimension = featureModel.getQualityInner(articleField);
    int featureDimension = featureModel.getQualityInner(featureField);
    int degreeDimension = featureModel.getQuantityInner(degreeField);
    for (DataInstance instance : featureModel) {
        int itemIndex = instance.getQualityFeature(articleDimension);
        int featureIndex = instance.getQualityFeature(featureDimension);
        float featureValue = instance.getQuantityFeature(degreeDimension);
        if (featureValue < minimumValue) {
            minimumValue = featureValue;
        }
        if (featureValue > maximumValue) {
            maximumValue = featureValue;
        }
        featureTable.setValue(itemIndex, featureIndex, featureValue);
    }
    for (MatrixScalar cell : featureTable) {
        float value = (cell.getValue() - minimumValue) / (maximumValue - minimumValue);
        featureTable.setValue(cell.getRow(), cell.getColumn(), value);
    }
    factorMatrix = DenseMatrix.valueOf(factorSize, itemSize);
    factorMatrix.iterateElement(MathCalculator.SERIAL, (scalar) -> {
        scalar.setValue(distribution.sample().floatValue());
    });
}