首页 > 解决方案 > 用于熵计算的numpy数组划分

问题描述

我正在尝试创建一个适用于 python 2 和 3 的 shannon_entropy 函数。以下代码适用于 python 3,但是,评估 norm_counts 的语句在 python 2 上返回一个等于 0 的 narray,并在 python 3 中正确返回。

我已经分解并简化了下面的代码:

import unittest   

import numpy as np

def shannon_ent(labels, base=256):

    value, counts = np.unique(labels, return_counts=True)    
    sum_counts = counts.sum()

    norm_counts = counts / sum_counts

    print(norm_counts)

    base = e if base is None else base

    logged_counts = np.log(norm_counts)
    logged_base = np.log(base)
    logged = logged_counts/logged_base
    final = -(norm_counts * logged)

    return final.sum()


class function_tests(unittest.TestCase):

    def test_shannon_ent(self):

        chunk = [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126]
        ent = shannon_ent(chunk)
        print('*** is: {}'.format(ent))

        self.assertEqual(ent, 0.8212319510413685)

if __name__ == '__main__':
    unittest.main()

给出以下输出:

蟒蛇2

# python unittest_binGraph.py 
(array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1]), 95)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
E
======================================================================
ERROR: test_shannon_ent (__main__.function_tests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "unittest_binGraph.py", line 39, in test_shannon_ent
    ent = shannon_ent(chunk)
  File "unittest_binGraph.py", line 22, in shannon_ent
    logged_counts = np.log(norm_counts)
FloatingPointError: divide by zero encountered in log

----------------------------------------------------------------------
Ran 1 test in 0.007s

FAILED (errors=1)

蟒蛇 3

# python unittest_binGraph.py 
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 95
[0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632]
*** is: 0.8212319510413685
.
----------------------------------------------------------------------
Ran 1 test in 0.007s

OK

除非有人有更好的方法来计算熵?!我目前也在代码中使用 scripy 和统计模块。

标签: pythonnumpyentropy

解决方案


它可能是整数除法。添加

from __future__ import division

在文件的最顶部。


推荐阅读