首页 > 解决方案 > 允许在一个跃点中完全绑定任何 6 元组模式的最小索引集是什么?

问题描述

我正在尝试在wiredtiger 之上构建一个6 元组存储。元组可以描述如下:

(graph, subject, predicate, object, alive, transaction)

存储在数据库中的每个元组都是唯一的。

除了数据库存储 6 个元组之外,查询类似于常规 SPARQL 查询。元组的零个或多个元素可以是可变的。这是一个示例查询,允许检索特定事务引入的所有更改P4X432

SELECT ?graph ?subject ?predicate ?object ?alive
WHERE 
{
  ?graph ?subject ?predicate ?object ?alive "P4X432"
}

考虑所有可能的模式最终会考虑以下所有组合:

(graph, subject, predicate, object, alive, transaction)

这是由以下函数给出的:

def combinations(tab):
    out = []
    for i in range(1, len(tab) + 1):
        out.extend(x for x in itertools.combinations(tab, i))

    assert len(out) == 2**len(tab) - 1
    return out

在哪里:

print(len(combinations(('graph', 'subject', 'predicate', 'object', 'alive', 'transaction'))))

展示:

63

那就是 6 元组的 63 种组合。我可以使用缺少的元组项来完成每个索引,例如以下组合:

('graph', 'predicate', 'transaction')

将与以下索引相关联:

('graph', 'predicate', 'transaction', 'subject', 'alive', 'object')

但我知道 6 元组的所有排列中有一个较小的子集具有以下属性:

{1, 2, ..., n} 的一组 n 排列,其中 {1, 2, ..., n} 的所有组合是该集合中至少一个元素的前缀排列。

否则说,所有组合都有一个排列,它是集合中一个元素的前缀。

我使用蛮力算法发现一组具有该属性的大小为 25(低于 63)的集合:

((5 0 1 2 3 4) (4 5 0 1 2 3) (3 4 5 0 1 2) (2 3 4 5 0 1) (1 2 3 4 5 0) (0 1 2 3 4 5) (0 2 1 3 4 5) (0 3 2 1 5 4) (0 4 3 1 5 2) (0 4 2 3 1 5) (2 1 5 3 0 4) (3 2 1 5 0 4) (3 1 4 5 0 2) (3 1 5 4 2 0) (3 0 1 4 2 5) (3 5 2 0 1 4) (4 3 1 0 2 5) (4 2 1 5 3 0) (4 1 0 2 5 3) (4 5 2 1 0 3) (5 4 1 2 3 0) (5 3 0 1 4 2) (5 2 1 3 4 0) (5 1 2 4 0 3) (5 0 2 4 3 1))

这是我用来计算该解决方案的 r7rs 方案程序:

(define-library (indices)
  (export indices)
  (export permutations)
  (export combination)
  (export combinations)

  (export run)

  (import (only (chezscheme) trace-define trace-lambda random trace-let))

  (import (scheme base))
  (import (scheme list))
  (import (scheme comparator))
  (import (scheme hash-table))
  (import (scheme process-context))

  (import (scheme write))

  (begin

    (define (combination k lst)
      (cond
       ((= k 0) '(()))
       ((null? lst) '())
       (else
        (let ((head (car lst))
              (tail (cdr lst)))
          (append (map (lambda (y) (cons head y)) (combination (- k 1) tail))
                  (combination k tail))))))

    (define (factorial n)
      (let loop ((n n)
                 (out 1))
        (if (= n 0)
            out
            (loop (- n 1) (* n out)))))


    (define (%binomial-coefficient n k)
      ;; https://en.wikipedia.org/wiki/Binomial_coefficient#Multiplicative_formula
      (let loop ((i 1)
                 (out 1))
        (if (= i (+ k 1))
            out
            (loop (+ i 1) (* out (/ (- (+ n 1) i) i))))))

    (define (memo proc)
      (let ((m (make-hash-table (make-equal-comparator))))
        (lambda args
          (if (hash-table-contains? m args)
              (hash-table-ref m args)
              (let ((v (apply proc args)))
                (hash-table-set! m args v)
                v)))))

    (define binomial-coefficient
      (memo
       (lambda (n k)
         (cond
          ((= n k) 1)
          ((= k 0) 1)
          (else (%binomial-coefficient n k))))))

    ;; k-combination ranking and unranking procedures according to
    ;; https://en.wikipedia.org/wiki/Combinatorial_number_system

    (define (ranking lst)
      (let loop ((lst (sort < lst)) ;; increasing sequence
                 (k 1)
                 (out 0))
        (if (null? lst)
            out
            (loop (cdr lst) (+ k 1) (+ out (binomial-coefficient (car lst) k))))))

    (define (%unranking k N)
      (let loop ((n (- k 1)))
                 (if (< N (binomial-coefficient (+ n 1) k))
                     n
                     (loop (+ n 1)))))

    (define (unranking k N)
      (let loop ((k k)
                       (N N)
                       (out '()))
                 (if (= k 0)
                     out
                     (let ((m (%unranking k N)))
                       (loop (- k 1) (- N (binomial-coefficient m k)) (cons m out))))))

    (define fresh-random
      (let ((memo (make-hash-table (make-eqv-comparator))))
        (lambda (n)
          (when (= (hash-table-size memo) n)
            (error 'oops "no more fresh number" n
                   ))
          (let loop ()
            (let ((r (random n)))
              (if (hash-table-contains? memo r)
                  (loop)
                  (begin (hash-table-set! memo r #t) r)))))))

    (define (random-k-combination k n)
      (unranking k (fresh-random (binomial-coefficient n k))))

    (define (combinations lst)
      (if (null? lst) '(())
          (let* ((head (car lst))
                 (tail (cdr lst))
                 (s (combinations tail))
                 (v (map (lambda (x) (cons head x)) s)))
            (append s v))))

    ;; (define (combinations lst)
    ;;   (append-map (lambda (k) (combination k lst)) (iota (length lst))))

    (define (permutations s)
      ;; http://rosettacode.org/wiki/Permutations#Scheme
      (cond
       ((null? s) '(()))
       ((null? (cdr s)) (list s))
       (else ;; extract each item in list in turn and permutations the rest
        (let splice ((l '()) (m (car s)) (r (cdr s)))
          (append
           (map (lambda (x) (cons m x)) (permutations (append l r)))
           (if (null? r) '()
               (splice (cons m l) (car r) (cdr r))))))))

    (define (shift lst index)
      (append (drop lst index) (take lst index)))

    (define (rotations lst)
      (reverse! (map (lambda (index) (shift lst index)) (iota (length lst)))))

    (define (prefix? lst other)
      "Return #t if LST is prefix of OTHER"
      (let prefix ((lst lst)
                 (other other))
        (if (null? lst)
            #t
            (if (= (car lst) (car other))
                (prefix (cdr lst) (cdr other))
                #f))))

    (define (indices lst)
      (let ((candidates (permutations lst)))
        (let loop ((out (rotations lst)) ;; all rotations are solutions
                   (combinations (combinations lst)))
          (if (null? combinations)
              (reverse! out)
              (let ((permutations (permutations (car combinations))))
                (if (any (lambda (s) (any (lambda (p) (prefix? p s)) permutations)) out)
                    ;; there is an existing "solution" for the
                    ;; permutations of COMBINATION move to the next
                    ;; combination
                    (loop out (cdr combinations))
                    (loop (cons (find (lambda (c) (if (member c out)
                                                      #f
                                                      (any (lambda (p) (prefix? p c)) permutations)))
                                      candidates)
                                out)
                          (cdr combinations))))))))


    (define (permutation-prefix? c o)
      (any (lambda (p) (prefix? p o)) (permutations c)))

    (define (ok? combinations candidate)
      (every (lambda (c) (any (lambda (p) (permutation-prefix? c p)) candidate)) combinations))

    (define (run)
      (let* ((n (string->number (cadr (command-line))))
             (N (iota n))
             (solution (indices N))
             (min (length solution))
             (rotations (rotations N))
             (R (length rotations))
             ;; other stuff
             (cx (combinations N))
             (px (filter (lambda (x) (not (member x rotations))) (permutations N)))
             ;; other other stuff
             (pn (length px))
             (PN (iota pn)))
        (display "(length solution) => ") (display (length solution))
        (display "\n")
        (display "(length rotations) => ") (display R)
        (display "\n")
        (let try ((x (- (length solution) 1)))
          (let ((count (binomial-coefficient pn (- x R))))
            (let loop ((index 0)
                       (cxx (map (lambda (x) (list-ref px x)) (random-k-combination (- x R) pn))))
              (when (= (modulo index (expt 10 5)) 0)
                (display "n=") (display n) (display " x=") (display x)
                (display " ")
                (display index) (display "/") (display count)  (display "\n"))
              (let ((candidate (append rotations cxx)))
                (let ((continue? (not (ok? cx candidate))))
                  (if continue?
                      (loop (+ index 1)
                            (map (lambda (x) (list-ref px x)) (random-k-combination (- x R) pn)))
                      (begin (display "new solution n=") (display n)
                             (display " length=") (display x)
                             (display " ") (display candidate)
                             (display "\n")
                             (try (- x 1)))))))))))

    ))

使用该排列列表,我可以查询任何模式。

我想知道是否有一个较小的集合以及是否有确定的算法来计算这种集合。

标签: algorithmschemesparqlkey-value-storetriplestore

解决方案


基于这个答案https://math.stackexchange.com/a/3146793/23663

以下程序根据 math ™ 生成一个最小解:

    import itertools
    import math
    
    
    f = math.factorial
    bc = lambda n, k: f(n) // f(k) // f(n-k) if k<n else 0
    
    
    def pk(*args):
        print(*args)
        return args[-1]
    
    
    def stringify(iterable):
        return ''.join(str(x) for x in iterable)
    
    
    def combinations(tab):
        out = []
        for i in range(1, len(tab) + 1):
            out.extend(stringify(x) for x in itertools.combinations(tab, i))
        assert len(out) == 2**len(tab) - 1
        return out
    
    
    def ok(solutions, tab):
        cx = combinations(tab)
    
        px = [stringify(x) for x in itertools.permutations(tab)]
    
        for combination in cx:
            pcx = [''.join(x) for x in itertools.permutations(combination)]
            # check for existing solution
            for solution in solutions:
                if any(solution.startswith(p) for p in pcx):
                    # yeah, there is an existing solution
                    break
            else:
                print('failed with combination={}'.format(combination))
                break
        else:
            return True
        return False
    
    
    def run(n):
        tab = list(range(n))
        cx = list(itertools.combinations(tab, n//2))
        for c in cx:
            L = [(i, i in c) for i in tab]
            A = []
            B = []
            while True:
                for i in range(len(L) - 1):
                    if (not L[i][1]) and L[i + 1][1]:
                        A.append(L[i + 1][0])
                        B.append(L[i][0])
                        L.remove((L[i + 1][0], True))
                        L.remove((L[i][0], False))
                        break
                else:
                    break
            l = [i for (i, _) in L]
            yield A + l + B
    
    
    
    for i in range(7):
        tab = stringify(range(i))
        solutions = [stringify(x) for x in run(i)]
        assert ok(solutions, tab)
        print("n={}, size={}, solutions={}".format(i, len(solutions), solutions))

上述程序输出为:

n=0, size=1, solutions=['']
n=1, size=1, solutions=['0']
n=2, size=2, solutions=['01', '10']
n=3, size=3, solutions=['012', '120', '201']
n=4, size=6, solutions=['0123', '2031', '3012', '1230', '1302', '2310']
n=5, size=10, solutions=['01234', '20341', '30142', '40123', '12340', '13402', '14203', '23410', '24013', '34021']
n=6, size=20, solutions=['012345', '301452', '401253', '501234', '203451', '240513', '250314', '340521', '350124', '450132', '123450', '142503', '152304', '134502', '135024', '145032', '234510', '235104', '245130', '345210']

推荐阅读