return dict( [(itemset,frequency) for itemset,frequency in counts.items() if frequency>=min_support ] )
import sys
#创建一个字典,存不同长度的频繁项集
#数据格式:
#频繁项集长度 对应的频繁项集
frequent_itemsets={}
min_support=50 #要求的最小支持度
#从频繁项集长度为1的开始,并且支持度要大于50
frequent_itemsets[1]= dict((frozenset((movie_id,)),row["Favorable"]) for movie_id,row in num_favorable_by_movie.iterrows() if row["Favorable"]>min_support)
#输出频繁集长度为1,支持度大于50的所有的电影信息
frequent_itemsets[1]
print("there are {0} movie with more than {1} favorable reviews".format( len(frequent_itemsets[1]), min_support))
rule_confidence={candidate_rule: correct_counts[candidate_rule]/ float(correct_counts[candidate_rule]+incorrect_counts[candidate_rule]) for candidate_rule in candidate_rules}
#设定最低置信度
min_confidence=0.9
#过滤掉小于最低置信度的规则
rule_confidence={candidate_rule: confidence for candidate_rule,confidence in rule_confidence.items() if confidence>min_confidence}
print( "the total of rules which bigger than min_confidence is {}".format( len(rule_confidence )) )
test_rule_confidence={candidate_rule: test_correct_counts[candidate_rule]/ float(test_correct_counts[candidate_rule]+test_incorrect_counts[candidate_rule]) for candidate_rule in candidate_rules}