# !/usr/bin/python # -*- coding: UTF-8 -*- import time import datetime import numpy as np def extract_consume_per_person(file_name, consume_dict): lines = open(file_name).readlines() for line in lines: temps = line.strip("\r\n").split("$") id = temps[0] totol_amount = 0 active_date_set = set() for i in range(1, len(temps)): records = temps[i].split(",") cate = records[0].strip("\"") amount = float(records[4].strip("\"")) time = records[3].strip("\"") date = time.split(" ")[0] active_date_set.add(date) if cate == "POS消费": totol_amount += amount consume_dict[id] = float(totol_amount) / len(active_date_set) def read_faculty_dict(file_name, faculty_dict): r = open(file_name) lines = r.readlines() for line in lines: temps = line.strip("\n").split(",") id = temps[0] faculty = temps[1] if not faculty_dict.has_key(faculty): faculty_dict[faculty] = [] faculty_dict[faculty].append(id) r.close() def extract_rank_feature(file_name, final_rank, score_dict, if_train): if if_train: w = open("../original_data/rank_feature_train.txt", 'w') else: w = open("../original_data/rank_feature_test.txt", 'w') lines = open(file_name).readlines() for line in lines: if if_train: id = line.strip().split(",")[0] else: id = line.strip() print id w.write("{") w.write('"stuId": ' + id + ", ") if score_dict.has_key(id) and final_rank.has_key(id): w.write('"rank_in_faculty":' + str(final_rank[id]) + "," + '"rank_score_consume":' + str( final_rank[id] * score_dict[id]) + "} \n") else: w.write( '"rank_in_faculty":' + str(final_rank.get(id, -999)) + "," + '"rank_score_consume":' + str( -999) + "} \n") w.close() def read_score_dict(file_name, score_dict): lines = open(file_name).readlines() for line in lines: temps = line.strip("\n").split(",") id = temps[0] rank = float(temps[1]) score_dict[id] = rank final_rank = {} faculty_dict = {} score_dict = {} read_score_dict("../original_data/forYance.txt", score_dict) read_faculty_dict("../original_data/raw_data/test/score_final_test.txt", faculty_dict) read_faculty_dict("../original_data/raw_data/train/score_train.txt", faculty_dict) read_faculty_dict("../original_data/raw_data/old_test/score_test.txt", faculty_dict) print faculty_dict consume_dict = {} extract_consume_per_person("../studentForm/test/card_final_test_inverted_cleaned.txt", consume_dict) extract_consume_per_person("../studentForm/train/card_train_inverted_cleaned.txt", consume_dict) extract_consume_per_person("../studentForm/test_old/card_test_inverted_cleaned.txt", consume_dict) print consume_dict for faculty_id in range(1, 20): faculty = str(faculty_id) student_list = faculty_dict[faculty] temp_dict = {} for id in student_list: if consume_dict.has_key(id): temp_dict[id] = consume_dict[id] temp_dict_list = sorted(temp_dict.iteritems(), key=lambda d: d[1], reverse=False) for i in range(len(temp_dict_list)): print str(temp_dict_list[i][0]) + " " + str(temp_dict_list[i][1]) final_rank[temp_dict_list[i][0]] = float(i + 1) / len(temp_dict_list) extract_rank_feature("../original_data/raw_data/test/subsidy_final_test.txt", final_rank, score_dict, False) extract_rank_feature("../original_data/raw_data/train/subsidy_train.txt", final_rank, score_dict, True)