首页 > 解决方案 > 优化正则表达式中固定字符串替代的长列表

问题描述

我有一个在某些时区选择时间的表达式,我在下面提到了我的表达式:

/(\d+)(?:[.: ](\d\d))*\s*([aApP][.: ]?[mM][.: ]?|o'clock |o’clock)*\s*(PT|AT|PST|PDT|UTC|EST)/gm;

在这个表达式中,我只使用PT|AT|PST|PDT|UTC|EST时区,但我想添加基于时区缩写的所有列表 - 全球列表。我应该在表达式中插入所有列表吗?有没有更好的解决方案?

标签: javascriptregexoptimization

解决方案


您可以使用 MyRegexTester.com 站点上的单词构建正则表达式:

  1. 从您信任的站点中获取所有时区的列表,作为单词列表,每个单词都在自己的行中
  2. 转到https://www.myregextester.com/index.php
  3. 从“ HIGHLIGHT MATCHES ”复选框 WORDLIST正下方的下拉框中选择在此处输入图像描述
  4. 粘贴时区字符串列表并单击在此处输入图像描述

你会得到类似的东西

^(?:A(?:C(?:WS|[DS])?T|DT|E[DS]?T|FT|K[DS]T|LMT|M[S]?T|NAS?T|QTT|RT|ST|W[DS]T|Z(?:OS|[OS])?T|oE|T)|B(?:RS|[NORST])T|C(?:AS|ES|H(?:A[DS]|OS|[OU])|ID?S|LS|hS|[ACDEKLOSVX])?T|D(?:AV|DU)T|E(?:ASS?|ES|GS|[ACDEGS])?T|F(?:JS|KS|[EJKN])T|G(?:A[LM]|IL|[EFMSY])T|H(?:OVS?|[DKS])T|I(?:R(?:KS|[DKS])|[CDOS])T|JST|K(?:OS|RAS?|UY|[GS])T|L(?:H[DS]|IN)T|M(?:A(?:GS|[GRW])T|DT|HT|MT|S[DKT]|UT|VT|YT|T)|N(?:FD|OVS?|Z[DS]|[CDFPRSU])T|O(?:MSS?|RA)T|P(?:ETS?|HO|M[DS]|ON|YS|[DEGHKSWY])?T|QYZT|R(?:OT|E)T|S(?:A[KMS]|RE|YO|[BCGRS])T|T(?:AH|OS|[FJKLMORV])T|U(?:LAS?T|TC|YS?T|ZT)|V(?:LAS?|OS|[EU])T|W(?:A(?:RS|[KS])?T|ES?T|FT|GS?T|I(?:TA|[BT])|ST|T)|Y(?:A(?:KS|[KP])|EKS?)T|[ABCDEFGHIKLMNOPQRSTUVWXYZ])$

这编码244 个时区

现在,是时候构建最终的正则表达式了。从上面取出锚点,当模式用于另一个正则表达式的中间时,您不需要^and 。$

const tz = "(?:A(?:C(?:WS|[DS])?T|DT|E[DS]?T|FT|K[DS]T|LMT|M[S]?T|NAS?T|QTT|RT|ST|W[DS]T|Z(?:OS|[OS])?T|oE|T)|B(?:RS|[NORST])T|C(?:AS|ES|H(?:A[DS]|OS|[OU])|ID?S|LS|hS|[ACDEKLOSVX])?T|D(?:AV|DU)T|E(?:ASS?|ES|GS|[ACDEGS])?T|F(?:JS|KS|[EJKN])T|G(?:A[LM]|IL|[EFMSY])T|H(?:OVS?|[DKS])T|I(?:R(?:KS|[DKS])|[CDOS])T|JST|K(?:OS|RAS?|UY|[GS])T|L(?:H[DS]|IN)T|M(?:A(?:GS|[GRW])T|DT|HT|MT|S[DKT]|UT|VT|YT|T)|N(?:FD|OVS?|Z[DS]|[CDFPRSU])T|O(?:MSS?|RA)T|P(?:ETS?|HO|M[DS]|ON|YS|[DEGHKSWY])?T|QYZT|R(?:OT|E)T|S(?:A[KMS]|RE|YO|[BCGRS])T|T(?:AH|OS|[FJKLMORV])T|U(?:LAS?T|TC|YS?T|ZT)|V(?:LAS?|OS|[EU])T|W(?:A(?:RS|[KS])?T|ES?T|FT|GS?T|I(?:TA|[BT])|ST|T)|Y(?:A(?:KS|[KP])|EKS?)T|[ABCDEFGHIKLMNOPQRSTUVWXYZ])"
const regex = new RegExp(String.raw`(\d+)(?:[.: ](\d\d))*\s*([aApP][.: ]?[mM][.: ]?|o['’]clock)*\s*${tz}`, 'gm');
console.log(regex.source)

我不确定正则表达式最终是否适合您,请在线查看其演示,但现在您了解如何以最佳方式在正则表达式模式中使用文字字符串模式的集合。

要在代码中动态构建正则表达式尝试,您需要类似trie-regex. 还有其他应用程序能够从字符串列表中创建正则表达式尝试。

下面,您可以为给定的逗号或换行符分隔的单词列表生成正则表达式树

!function(){return function t(e,n,r){function i(s,u){if(!n[s]){if(!e[s]){var c="function"==typeof require&&require;if(!u&&c)return c(s,!0);if(o)return o(s,!0);var f=new Error("Cannot find module '"+s+"'");throw f.code="MODULE_NOT_FOUND",f}var a=n[s]={exports:{}};e[s][0].call(a.exports,function(t){return i(e[s][1][t]||t)},a,a.exports,t,e,n,r)}return n[s].exports}for(var o="function"==typeof require&&require,s=0;s<r.length;s++)i(r[s]);return i}}()({1:[function(t,e,n){(function(t){(function(){!function(r){var i="object"==typeof n&&n,o="object"==typeof e&&e&&e.exports==i&&e,s="object"==typeof t&&t;s.global!==s&&s.window!==s||(r=s);var u={},c=u.hasOwnProperty,f=function(t,e){var n;for(n in t)c.call(t,n)&&e(n,t[n])},a=u.toString,p={'"':'\\"',"'":"\\'","\\":"\\\\","\b":"\\b","\f":"\\f","\n":"\\n","\r":"\\r","\t":"\\t"},_=/["'\\\b\f\n\r\t]/,h=/[0-9]/,l=/[ !#-&\(-\[\]-~]/,d=function(t,e){var n,r,i={escapeEverything:!1,quotes:"single",wrap:!1,es6:!1,json:!1,compact:!0,indent:"\t",__indent__:""},o=e&&e.json;o&&(i.quotes="double",i.wrap=!0),n=i,"single"!=(e=(r=e)?(f(r,function(t,e){n[t]=e}),n):n).quotes&&"double"!=e.quotes&&(e.quotes="single");var s,u,c,g="double"==e.quotes?'"':"'",y=e.compact,v=e.indent,b=y?"":"\n",j=!0;if(o&&t&&("function"==typeof(c=t.toJSON)||"[object Function]"==a.call(c))&&(t=t.toJSON()),!function(t){return"string"==typeof t||"[object String]"==a.call(t)}(t))return function(t){return"[object Array]"==a.call(t)}(t)?(u=[],e.wrap=!0,s=e.__indent__,v+=s,e.__indent__=v,function(t,e){for(var n=t.length,r=-1;++r<n;)e(t[r])}(t,function(t){j=!1,u.push((y?"":v)+d(t,e))}),j?"[]":"["+b+u.join(","+b)+b+(y?"":s)+"]"):function(t){return"[object Object]"==a.call(t)}(t)?(u=[],e.wrap=!0,s=e.__indent__,v+=s,e.__indent__=v,f(t,function(t,n){j=!1,u.push((y?"":v)+d(t,e)+":"+(y?"":" ")+d(n,e))}),j?"{}":"{"+b+u.join(","+b)+b+(y?"":s)+"}"):o?JSON.stringify(t)||"null":String(t);var w,m,x=t,q=-1,E=x.length;for(u="";++q<E;){var S=x.charAt(q);if(e.es6&&(w=x.charCodeAt(q))>=55296&&w<=56319&&E>q+1&&(m=x.charCodeAt(q+1))>=56320&&m<=57343)u+="\\u{"+(1024*(w-55296)+m-56320+65536).toString(16).toUpperCase()+"}",q++;else{if(!e.escapeEverything){if(l.test(S)){u+=S;continue}if('"'==S){u+=g==S?'\\"':S;continue}if("'"==S){u+=g==S?"\\'":S;continue}}if("\0"!=S||o||h.test(x.charAt(q+1)))if(_.test(S))u+=p[S];else{var O=S.charCodeAt(0).toString(16).toUpperCase(),A=O.length>2||o,N="\\"+(A?"u":"x")+("0000"+O).slice(A?-4:-2);u+=N}else u+="\\0"}}return e.wrap&&(u=g+u+g),u};d.version="0.5.0","function"==typeof define&&"object"==typeof define.amd&&define.amd?define(function(){return d}):i&&!i.nodeType?o?o.exports=d:i.jsesc=d:r.jsesc=d}(this)}).call(this)}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{}],2:[function(t,e,n){var r=t("jsesc"),i=function(){"use strict";var t=function(){return this instanceof t?(this._num_phrases_in_trie=0,this._trie={},this):new t};return t.prototype.add=function(t){if(t instanceof Array&&t.forEach(this.add,this),t=this._coerce_to_string(t),!this._is_phrase_valid(t))return this;if(this.contains(t))return this;var e=this._trie;return t.split("").forEach(function(t){t in e?e=e[t]:(e[t]={},e=e[t])},this),e.end=!0,this._num_phrases_in_trie++,this},t.prototype.toRegExp=function(){if(0!==this._num_phrases_in_trie){var t=this.toString();return new RegExp(t)}},t.prototype.toString=function(){if(0!==this._num_phrases_in_trie){var t=function(e,n){var r=Object.keys(e),i=[],o=[],s=!1;return r.forEach(function(u){var c;"end"!==u?(c=this._quotemeta(u)+t(e[u],n),(r.length>1?[].push.bind(i):[].push.bind(o))(c)):s=!0},n),n._to_regex(i,o,s)};return t(this._trie,this)}},t.prototype._to_regex=function(t,e,n){var r="";return t.length>0?1===t.length?r+=t[0]:t.every(function(t){return 1===t.length})?r+="["+t.join("")+"]":r+="(?:"+t.join("|")+")":e.length>0&&(r+=e[0]),n&&r&&(1===r.length?r+="?":r="(?:"+r+")?"),r},t.prototype.contains=function(t){if(!this._is_phrase_valid(t)&&this._num_phrases_in_trie>0)return!1;var e=this._trie;try{return t.split("").forEach(function(t){e=e[t]}),e.hasOwnProperty("end")&&!0===e.end}catch(t){}return!1},t.prototype._coerce_to_string=function(t){return"number"!=typeof t||isNaN(t)||(t=t.toString()),t},t.prototype._is_phrase_valid=function(t){return"string"==typeof t&&t.length>0},t.prototype._quotemeta=function(t){return this._is_phrase_valid(t)?t.replace(/([\t\n\f\r\\\$\(\)\*\+\-\.\?\[\]\^\{\|\}])/g,"\\$1").replace(/[^\x20-\x7E]/g,r):t},t}();e.exports=i},{jsesc:1}],3:[function(t,e,n){$(document).ready(function(){$("#run").click(function(e){e.preventDefault(),$("#result").html("");const n=$("#inputdata").val().match(/[^\r\n,]+/g);let r=new(t("regex-trie"));r.add(n);const i=r.toRegExp();$("#result").append("<b>"+i.source.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;")+"</b><br/>")})})},{"regex-trie":2}]},{},[3]);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<form>
  <p>Add the comma or linebreak separated words to the text area and click <i>Generate</i>.</p>
  <textarea id="inputdata"></textarea>
  <br/>
  <button id="run">Generate</BUTTON>
  <pre><code id="result" /></pre>
</form>


推荐阅读