c# - 我正在尝试使用 powerset 构造和处理由 unicode 引入的大范围在 C# 中进行 NFA->DFA 翻译
问题描述
我使用单个字符值成功地在 DFA 上实现了 powerset 构造(我认为是霍夫曼最小化),但我已经修改了 FA 状态以能够处理 unicode 范围。现在它们看起来像这样:
sealed partial class FA
{
public readonly Dictionary<KeyValuePair<int,int>,FA> InputTransitions = new Dictionary<KeyValuePair<int,int>,FA>();
public readonly HashSet<FA> EpsilonTransitions = new HashSet<FA>();
public int AcceptSymbol = -1;
public bool IsAccepting = false;
我的字符存储为 UTF-32 值整数,因此我不必处理代理项每个输入转换都可以采用一个范围,其中包含编码为键值对的开始和结束 UTF-32 值
我已经修改了我的 ToDfa() 例程(以前适用于单个值)以使用范围,但它不能正常工作。NormalizeSortedRangeList() 采用键值对范围的预排序列表并组合重叠范围(将多个重叠范围替换为包含所有这些范围的单个范围)。_SetComparer 只是对列表和集合进行无序比较
public FA ToDfa(IProgress<FAProgress> progress=null)
{
// if it's already a DFA we don't need to do this transformation.
// however, we still need to clone the state machine it because
// the consumer expects a copy, not the original state.
if (IsDfa)
return Clone();
// The DFA states are keyed by the set of NFA states they represent.
var dfaMap = new Dictionary<List<FA>, FA>(_SetComparer.Default);
var unmarked = new HashSet<FA>();
// compute the epsilon closure of the initial state in the NFA
var states = new List<FA>();
FillEpsilonClosure(states);
// create a new state to represent the current set of states. If one
// of those states is accepting, set this whole state to be accepting.
FA dfa = new FA();
var al = new List<int>();
// find the accepting symbols for the current states
foreach (var fa in states)
if (fa.IsAccepting)
if (!al.Contains(fa.AcceptSymbol))
al.Add(fa.AcceptSymbol);
// here we assign the appropriate accepting symbol
int ac = al.Count;
//if (1 == ac)
if(0<ac)
dfa.AcceptSymbol = al[0];
//else if (1 < ac)
// dfa.AcceptSymbol = al[0]; // could throw, just choose the first one
dfa.IsAccepting = 0 < ac;
FA result = dfa; // store the initial state for later, so we can return it.
// add it to the dfa map
dfaMap.Add(states, dfa);
// add it to the unmarked states, signalling that we still have work to do.
unmarked.Add(dfa);
bool done = false;
var j = 0;
while (!done)
{
if(null!=progress)
{
progress.Report(new FAProgress(FAStatus.DfaTransform, j));
}
done = true;
// a new hashset used to hold our current key states
var mapKeys = new HashSet<List<FA>>(dfaMap.Keys, _SetComparer.Default);
foreach (var mapKey in mapKeys)
{
dfa = dfaMap[mapKey];
if (unmarked.Contains(dfa))
{
// when we get here, mapKey represents the epsilon closure of our
// current dfa state, which is indicated by kvp.Value
// build the transition list for the new state by combining the transitions
// from each of the old states
// retrieve every possible input for these states
var inputs = new List<KeyValuePair<int,int>>();
foreach (var state in mapKey)
{
foreach (var trns in state.InputTransitions)
if(!inputs.Contains(trns.Key))
inputs.Add(trns.Key);
}
inputs.Sort((x, y) => {
var c = x.Key.CompareTo(y.Key);
if (0 == c)
c = x.Value.CompareTo(y.Value);
return c;
});
RangeUtility.NormalizeSortedRangeList(inputs);
// for each input, create a new transition
foreach (var input in inputs)
{
var acc = new List<int>();
var ns = new List<FA>();
foreach (var state in mapKey)
{
foreach (var trns in state.InputTransitions)
{
if (RangeUtility.Intersects(trns.Key, input))
{
FA dst = trns.Value;
foreach (var d in dst.FillEpsilonClosure())
{
// add the accepting symbols
if (d.IsAccepting)
if (!acc.Contains(d.AcceptSymbol))
acc.Add(d.AcceptSymbol);
if (!ns.Contains(d))
ns.Add(d);
}
}
}
}
FA ndfa;
if (!dfaMap.TryGetValue(ns, out ndfa))
{
ac = acc.Count;
ndfa = new FA(0 < ac);
// assign the appropriate accepting symbol
//if (1 == ac)
if(0<ac)
ndfa.AcceptSymbol = acc[0];
//else if (1 < ac)
// ndfa.AcceptSymbol = acc[0]; // could throw, instead just set it to the first state's accept
dfaMap.Add(ns, ndfa);
// work on this new state
unmarked.Add(ndfa);
done = false;
}
dfa.InputTransitions.Add(input, ndfa);
}
// we're done with this state
unmarked.Remove(dfa);
}
}
++j;
}
return result;
}
它有点作用,但不是真的。问题是我的范围处理很糟糕,但我不太清楚如何。我已经为我的州倾倒了 GraphViz dot,但我无法理解它哪里出了问题。
我尝试查看 Brics (Java) 和 Fare (C#),但我无法理解他们的 NFA->DFA 例程,这些例程创建了一堆垃圾状态,然后将其删除。
我不完全确定我需要为你澄清什么,读者,因为我对问题领域的理解不够好,所以请在评论中发表你对我的任何问题,我会尽力而为.
此处提供完整源代码https://github.com/codewitch-honey-crisis/Rolex/tree/master/Rolex/FA
你到底是如何在动力装置构造中做范围的?几个图书馆都这样做了,我想不出来
解决方案
我终于设法在我自己的代码中采用 Fare/bric 的解决方案
// Portions of this code adopted from Fare, itself adopted from brics
// original copyright notice included.
// This is the only file this applies to.
/*
* dk.brics.automaton
*
* Copyright (c) 2001-2011 Anders Moeller
* All rights reserved.
* http://github.com/moodmosaic/Fare/
* Original Java code:
* http://www.brics.dk/automaton/
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
public static FA _Determinize(FA fa, IProgress<FAProgress> progress = null)
{
if (null != progress)
progress.Report(new FAProgress(FAStatus.DfaTransform, 0));
var p = new HashSet<int>();
var closure = new List<FA>();
fa.FillClosure(closure);
for(int ic=closure.Count,i=0;i<ic;++i)
{
var ffa = closure[i];
p.Add(0);
foreach (var t in ffa.InputTransitions)
{
p.Add(t.Key.Key);
if (t.Key.Value < 0x10ffff)
{
p.Add((t.Key.Value + 1));
}
}
}
var points = new int[p.Count];
p.CopyTo(points, 0);
Array.Sort(points);
var comparer = _SetComparer.Default;
var sets = new Dictionary<ICollection<FA>, ICollection<FA>>(comparer);
var working = new Queue<ICollection<FA>>();
var dfaMap = new Dictionary<ICollection<FA>, FA>(comparer);
var initial = fa.FillEpsilonClosure();
sets.Add(initial, initial);
working.Enqueue(initial);
var result = new FA();
foreach(var afa in initial)
{
if(afa.IsAccepting)
{
result.IsAccepting = true;
result.AcceptSymbol = afa.AcceptSymbol;
break;
}
}
dfaMap.Add(initial, result);
var j = 1;
while (working.Count > 0)
{
ICollection<FA> s = working.Dequeue();
var ecs = FillEpsilonClosure(s);
FA dfa;
dfaMap.TryGetValue(s, out dfa);
foreach (FA q in ecs)
{
if (q.IsAccepting)
{
dfa.IsAccepting = true;
dfa.AcceptSymbol = q.AcceptSymbol;
break;
}
}
for (var i = 0; i < points.Length; i++)
{
var set = new HashSet<FA>();
foreach (FA c in ecs)
{
foreach (var trns in c.InputTransitions)
{
if (trns.Key.Key <= points[i] && points[i] <= trns.Key.Value)
{
foreach (var efa in trns.Value.FillEpsilonClosure())
set.Add(trns.Value);
}
}
}
if (!sets.ContainsKey(set))
{
sets.Add(set, set);
working.Enqueue(set);
dfaMap.Add(set, new FA());
}
FA dst;
dfaMap.TryGetValue(set, out dst);
int first = points[i];
int last;
if (i + 1 < points.Length)
last = (points[i + 1] - 1);
else
last = 0x10ffff;
dfa.InputTransitions.Add(new KeyValuePair<int, int>(first, last), dst);
}
if (null != progress)
progress.Report(new FAProgress(FAStatus.DfaTransform, j));
++j;
}
// remove dead transitions
foreach(var ffa in result.FillClosure())
{
var itrns = new List<KeyValuePair<KeyValuePair<int, int>, FA>>(ffa.InputTransitions);
ffa.InputTransitions.Clear();
foreach(var trns in itrns)
{
if(null!=trns.Value.FirstAcceptingState)
{
ffa.InputTransitions.Add(trns.Key, trns.Value);
}
}
if (null != progress)
progress.Report(new FAProgress(FAStatus.DfaTransform, j));
++j;
}
return result;
}
推荐阅读
- java - 转换类没有效果
- c - 编写一个 C 程序,接受来自用户的一些整数并找到最大值和输入位置
- c++ - C++:引用和智能指针 - 有没有智能引用之类的东西?
- nginx - Nginx 位置和反向代理配置无法正常工作
- python - 如何从excel中的单元格中选择所有文本
- javascript - 如何随机化 (question_options) 数组中的值并使用单选按钮显示它们以进行特定测试?
- java - 在 gradle 中构建具有两个或多个根的多模块项目
- r - 如何将无监督的层次聚类结果与原始数据合并
- reactjs - 组件未显示在带有 laravel 的反应路由器中
- java - 在java中,gas * price + value 的资金不足