python - 用于熵计算的numpy数组划分
问题描述
我正在尝试创建一个适用于 python 2 和 3 的 shannon_entropy 函数。以下代码适用于 python 3,但是,评估 norm_counts 的语句在 python 2 上返回一个等于 0 的 narray,并在 python 3 中正确返回。
我已经分解并简化了下面的代码:
import unittest
import numpy as np
def shannon_ent(labels, base=256):
value, counts = np.unique(labels, return_counts=True)
sum_counts = counts.sum()
norm_counts = counts / sum_counts
print(norm_counts)
base = e if base is None else base
logged_counts = np.log(norm_counts)
logged_base = np.log(base)
logged = logged_counts/logged_base
final = -(norm_counts * logged)
return final.sum()
class function_tests(unittest.TestCase):
def test_shannon_ent(self):
chunk = [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126]
ent = shannon_ent(chunk)
print('*** is: {}'.format(ent))
self.assertEqual(ent, 0.8212319510413685)
if __name__ == '__main__':
unittest.main()
给出以下输出:
蟒蛇2
# python unittest_binGraph.py
(array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1]), 95)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
E
======================================================================
ERROR: test_shannon_ent (__main__.function_tests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "unittest_binGraph.py", line 39, in test_shannon_ent
ent = shannon_ent(chunk)
File "unittest_binGraph.py", line 22, in shannon_ent
logged_counts = np.log(norm_counts)
FloatingPointError: divide by zero encountered in log
----------------------------------------------------------------------
Ran 1 test in 0.007s
FAILED (errors=1)
蟒蛇 3
# python unittest_binGraph.py
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 95
[0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632]
*** is: 0.8212319510413685
.
----------------------------------------------------------------------
Ran 1 test in 0.007s
OK
除非有人有更好的方法来计算熵?!我目前也在代码中使用 scripy 和统计模块。
解决方案
它可能是整数除法。添加
from __future__ import division
在文件的最顶部。
推荐阅读
- php - 如何使用新方法扩展 PDO 类并在另一个类中使用该方法?
- php - PHPUnit - 在“”中找不到类“/path/to/my/class.php”
- python - Pyomo Set 和 Python set 有什么区别吗?
- sql - 使用存储过程的返回值进行 SSIS 循环
- jquery - 使用变量构建选择选项
- matching - 记录与多个数据集的链接
- javascript - 如何修复我的代码以确定整个字符串是否按字母顺序排列?
- scala - 如何使用 scala 将具有列表值的案例类正确展平为另一个案例类
- amazon-web-services - 使用 Amazon Cognito 进行 Hasura Webhook 身份验证
- python - PGM 文件无法在 tkinter 窗口中打开,“无法识别图像文件中的数据”