algorithm - 最大分区
问题描述
给定一个整数n和 2 个实数序列 { a_1 , ..., a_n } 和 { b_1 , ..., b_n },对于所有i , a_i , b_i > 0 。对于给定的固定m < n让 { P_1 , ..., P_m } 是集合 {1, ..., n } 的一个分区,如P_1 U ... U P_n = {1, ..., n },与P_i的成对不相交(空相交)。我希望找到一个大小为m的分区来最大化表达式
集合的分区数是n选择m,用蛮力做的大得令人望而却步。是否有更好的迭代或近似解决方案?
为了深入了解这个问题,最后的代码块通过蛮力解决。对于实际尺寸问题(n ~ 1e6,k ~ 20),它不能按原样使用,但很容易被分发。
编辑:按a ^2/ b的值对a、b进行预排序总是会增加分区索引:
a = rng.uniform(low=0.0, high=10.0, size=NUM_POINTS)
b = rng.uniform(low=0.0, high=10.0, size=NUM_POINTS)
ind = np.argsort(a/b)
(a,b) = (seq[ind] for seq in (a,b))
一个样本运行
NUM_POINTS = 16
PARTITION_SIZE = 3
给出一个最优划分
[[0, 1, 2, 3, 4, 5, 6, 7], [8, 9], [10, 11]]
这在索引中是单调的。我想我可以证明这一点。如果是这样,蛮力搜索可以改进为n选择k -1 次,仍然很长,但可以节省大量资金。
import numpy as np
import multiprocessing
import concurrent.futures
from functools import partial
from itertools import islice
rng = np.random.RandomState(55)
def knuth_partition(ns, m):
def visit(n, a):
ps = [[] for i in range(m)]
for j in range(n):
ps[a[j + 1]].append(ns[j])
return ps
def f(mu, nu, sigma, n, a):
if mu == 2:
yield visit(n, a)
else:
for v in f(mu - 1, nu - 1, (mu + sigma) % 2, n, a):
yield v
if nu == mu + 1:
a[mu] = mu - 1
yield visit(n, a)
while a[nu] > 0:
a[nu] = a[nu] - 1
yield visit(n, a)
elif nu > mu + 1:
if (mu + sigma) % 2 == 1:
a[nu - 1] = mu - 1
else:
a[mu] = mu - 1
if (a[nu] + sigma) % 2 == 1:
for v in b(mu, nu - 1, 0, n, a):
yield v
else:
for v in f(mu, nu - 1, 0, n, a):
yield v
while a[nu] > 0:
a[nu] = a[nu] - 1
if (a[nu] + sigma) % 2 == 1:
for v in b(mu, nu - 1, 0, n, a):
yield v
else:
for v in f(mu, nu - 1, 0, n, a):
yield v
def b(mu, nu, sigma, n, a):
if nu == mu + 1:
while a[nu] < mu - 1:
yield visit(n, a)
a[nu] = a[nu] + 1
yield visit(n, a)
a[mu] = 0
elif nu > mu + 1:
if (a[nu] + sigma) % 2 == 1:
for v in f(mu, nu - 1, 0, n, a):
yield v
else:
for v in b(mu, nu - 1, 0, n, a):
yield v
while a[nu] < mu - 1:
a[nu] = a[nu] + 1
if (a[nu] + sigma) % 2 == 1:
for v in f(mu, nu - 1, 0, n, a):
yield v
else:
for v in b(mu, nu - 1, 0, n, a):
yield v
if (mu + sigma) % 2 == 1:
a[nu - 1] = 0
else:
a[mu] = 0
if mu == 2:
yield visit(n, a)
else:
for v in b(mu - 1, nu - 1, (mu + sigma) % 2, n, a):
yield v
n = len(ns)
a = [0] * (n + 1)
for j in range(1, m + 1):
a[n - m + j] = j - 1
return f(m, n, 0, n, a)
def Bell_n_k(n, k):
''' Number of partitions of {1,...,n} into
k subsets, a restricted Bell number
'''
if (n == 0 or k == 0 or k > n):
return 0
if (k == 1 or k == n):
return 1
return (k * Bell_n_k(n - 1, k) +
Bell_n_k(n - 1, k - 1))
NUM_POINTS = 13
PARTITION_SIZE = 4
NUM_WORKERS = multiprocessing.cpu_count()
INT_LIST= range(0, NUM_POINTS)
REPORT_EACH = 10000
partitions = knuth_partition(INT_LIST, PARTITION_SIZE)
# Theoretical number of partitions, for accurate
# division of labor
num_partitions = Bell_n_k(NUM_POINTS, PARTITION_SIZE)
bin_ends = list(range(0,num_partitions,int(num_partitions/NUM_WORKERS)))
bin_ends = bin_ends + [num_partitions] if num_partitions/NUM_WORKERS else bin_ends
islice_on = list(zip(bin_ends[:-1], bin_ends[1:]))
# Have to consume it; can't split work on generator
partitions = list(partitions)
rng.shuffle(partitions)
slices = [list(islice(partitions, *ind)) for ind in islice_on]
return_values = [None] * len(slices)
futures = [None] * len(slices)
a = rng.uniform(low=0.0, high=10.0, size=NUM_POINTS)
b = rng.uniform(low=0.0, high=10.0, size=NUM_POINTS)
ind = np.argsort(a/b)
(a,b) = (seq[ind] for seq in (a,b))
def start_task():
print('Starting ', multiprocessing.current_process().name)
def _task(a, b, partitions, report_each=REPORT_EACH):
max_sum = float('-inf')
arg_max = -1
for ind,part in enumerate(partitions):
val = 0
for p in part:
val += sum(a[p])**2/sum(b[p])
if val > max_sum:
max_sum = val
arg_max = part
if not ind%report_each:
print('Percent complete: {:.{prec}f}'.
format(100*len(slices)*ind/num_partitions, prec=2))
return (max_sum, arg_max)
def reduce(return_values):
return max(return_values, key=lambda x: x[0])
task = partial(_task, a, b)
with concurrent.futures.ThreadPoolExecutor() as executor:
for ind,slice in enumerate(slices):
futures[ind] = executor.submit(task, slice)
return_values[ind] = futures[ind].result()
reduce(return_values)
解决方案
我试图简单地用示例输入重新表述问题,如果我遗漏了什么,请告诉我。
A = [1, 3, 2, 1, 4] B = [2, 1, 5, 3, 1] n = 长度(A) = 长度(B) = 5
我们有两个带有正整数的列表。
我们需要找到一组索引 S(N = {1,2,3,..n} 的子集),假设它是 {2,3,5}。现在,我们得到一个新集合 S' = N - S = {1, 4}
对于 S 和 S',(sum(A[S]))^2/(sum(B[S'])) 需要最大化。
正如您所说,近似解决方案也将起作用。我们可以使用的启发式方法之一是我们需要选择这样的 S,以便 A 列表的值高而 B 列表的值低。
当我们在 A 的子集上取总和的平方时,让我们对 A 进行排序并选择一个子列表,以便我们获得最高分数。
import numpy as np
A = np.array([1, 2, 3, 4, 1, 2, 3])
B = np.array([3, 3, 1, 2, 1, 3, 1])
sorted_idx = sorted(range(len(A)), key=lambda k: A[k]) # also other sorting strategy can be used, A[k]/B[k]
A_p = A[sorted_idx]
B_p = B[sorted_idx]
max_s = 0
part_ans = -1
for i in range(len(A_p)):
cur_s = (sum(A_p[:i])**2)/sum(B_p[i:])
if cur_s >= max_s:
print(cur_s)
max_s = cur_s
part_ans = i
print(f'The partitions are: {sorted_idx[:i]} and {sorted_idx[i:]}')
推荐阅读
- java - 如何在现有存储库上使用 JGit 从原点提取
- r - AWS Rstudio 中的项目
- angular - 从 angular4 升级到 angular 5 后 npm 测试失败“TypeScript 编译中缺少 test.ts”
- ffmpeg - FFmpeg android - Fontconfig 错误:无法加载默认配置文件
- reactjs - 如何处理 ReactJS 中按钮单击的导航?
- docker - 将日志文件从容器复制回主机
- math - 按频率标准化分数
- android - Firebase facebook 身份验证对我来说无法正常工作
- qt - cc1plus.exe:分配 536875007 个字节的内存不足
- difference - Ab Initio 和 Data Stage 的比较