首页 > 技术文章 > 工具类

krockey 2018-05-23 17:07 原文

文本处理工具类

对网上常见的文本处理做了整理,以备后用,持续更新中……

    public class TextFormat
    {
        /// <summary>
        /// 全角转半角。
        /// </summary>
        public static string ToDBC(string text)
        {
            char[] c = text.ToCharArray();
            for (int i = 0; i < c.Length; i++)
            {
                if (c[i] == 12288)
                {
                    c[i] = (char)32;
                    continue;
                }
                if (c[i] > 65280 && c[i] < 65375)
                    c[i] = (char)(c[i] - 65248);
            }
            return new string(c);
        }

        /// <summary>
        /// 去除HTML标签。
        /// </summary>
        public static string FilterHtmlTag(string text)
        {
            return Regex.Replace(text, "<[^>]+>", "");
        }

        /// <summary>
        /// 剔除非常规字符。
        /// </summary>
        /// <remarks>
        /// 网上常用的汉字范围是U4E00-U9FA5,这个范围是“中日韩统一表意文字”,但要包含全部,则还要兼容他们的扩展集2E80-A4CF、F900-FAFF、FE30-FE4F。
        /// 2E80-A4CF:包含了中日朝部首补充、康熙部首、表意文字描述符、中日朝符号和标点、日文平假名、日文片假名、注音字母、谚文兼容字母、象形字注释标志、注音字母扩展、中日朝笔画、日文片假名语音扩展、带圈中日朝字母和月份、中日朝兼容、中日朝统一表意文字扩展A、易经六十四卦符号、中日韩统一表意文字、彝文音节、彝文字根
        /// F900-FAFF:中日朝兼容表意文字
        /// FE30-FE4F:中日朝兼容形式
        /// 
        /// GB2312:\u00a4-\u9fa0、\uff01-\uffe5
        /// 
        /// 见:http://ju.outofmemory.cn/entry/53571
        /// 
        /// 标准CJK文字:\u3400-\u4DB5,\u4E00-\u9FA5,\u9FA6-\u9FBB,\uF900-\uFA2D,\uFA30-\uFA6A,\uFA70-\uFAD9
        /// 全角ASCII、全角中英文标点、半宽片假名、半宽平假名、半宽韩文字母范围:\uFF00-\uFFEF
        /// CJK部首补充:\u2E80-\u2EFF
        /// CJK标点符号:\u3000-\u303F
        /// CJK笔划:\u31C0-\u31EF
        /// 康熙部首:\u2F00-\u2FDF
        /// 汉字结构描述字符:\u2FF0-\u2FFF
        /// 注音符号:\u3100-\u312F
        /// 注音符号(闽南语、客家语扩展):\u31A0-\u31BF
        /// 日文平假名:\u30A0-\u30FF
        /// 日文片假名拼音扩展:\u31F0-\u31FF
        /// 韩文拼音:\uAC00-\uD7AF 
        /// </remarks>
        public static string FilterMeanlessWord(string text)
        {
            return Regex.Replace(text, "[^\u4e00-\u9fa50-9a-zA-Z]", "");
        }

        /// <summary>
        /// 剔除常用标点。
        /// </summary>
        public static string FilterCommonPunctuation(string text)
        {
            return Regex.Replace(text, @"[~!@#\$%\^&\*\(\)\+=\|\\\}\]\{\[:;<,>\?\/""_\-`\.']+", "");
        }

        /// <summary>
        /// 转换会小写。
        /// </summary>
        public static string ToLower(string text)
        {
            return text.ToLower();
        }

        /// <summary>
        /// 过滤空格。
        /// </summary>
        public static string FilterSpace(string text)
        {
            return Regex.Replace(text, "[\\s]+", "");
        }

        /// <summary>
        /// 转换为Unicode字符。
        /// </summary>
        public static string ToUnicode(string text)
        {
            StringBuilder result = new StringBuilder();
            for (int i = 0; i < text.Length; i++)
            {
                if (".$^{[(|)*+?\\".Contains(text[i].ToString()))
                {
                    byte[] bytes = Encoding.Unicode.GetBytes(text[i].ToString());
                    result.Append("\\u");
                    result.Append(bytes[1].ToString("x").PadLeft(2, '0'));
                    result.Append(bytes[0].ToString("x").PadLeft(2, '0'));
                }
                else
                {
                    result.Append(text[i]);
                }
            }
            return result.ToString();
        }

        /// <summary>
        /// 大小写金额转换。
        /// </summary>
        public static string ToChMoney(double money)
        {
            string s = money.ToString("#L#E#D#C#K#E#D#C#J#E#D#C#I#E#D#C#H#E#D#C#G#E#D#C#F#E#D#C#.0B0A");
            string d = Regex.Replace(s, @"((?<=-|^)[^1-9]*)|((?'z'0)[0A-E]*((?=[1-9])|(?'-z'(?=[F-L\.]|$))))|((?'b'[F-L])(?'z'0)[0A-L]*((?=[1-9])|(?'-z'(?=[\.]|$))))", "${b}${z}");
            return Regex.Replace(d, ".", m => "负圆空零壹贰叁肆伍陆柒捌玖空空空空空空空分角拾佰仟万亿兆京垓秭穰"[m.Value[0] - '-'].ToString());
        }

        /// <summary>
        /// 中文转数字。
        /// </summary>

        public static string ChToNum(string text)
        {
            StringBuilder tb = new StringBuilder();

            foreach (var c in text)
            {
                switch (c)
                {
                    case '零': tb.Append(0); break;
                    case '一': tb.Append(1); break; //数字处理时考虑到一类似于—,可以不做转换
                    case '二': tb.Append(2); break;
                    case '三': tb.Append(3); break;
                    case '四': tb.Append(4); break;
                    case '五': tb.Append(5); break;
                    case '六': tb.Append(6); break;
                    case '七': tb.Append(7); break;
                    case '八': tb.Append(8); break;
                    case '九': tb.Append(9); break;
                    case '壹': tb.Append(1); break;
                    case '贰': tb.Append(2); break;
                    case '叁': tb.Append(3); break;
                    case '肆': tb.Append(4); break;
                    case '伍': tb.Append(5); break;
                    case '陆': tb.Append(6); break;
                    case '柒': tb.Append(7); break;
                    case '捌': tb.Append(8); break;
                    case '玖': tb.Append(9); break;
                    default: tb.Append(c); break;
                }
            }
            return tb.ToString();
        }
    }

RabbitMQ的自定义客户端

  /// <summary>
  /// 功能说明:RabbitMQ操作类。
  /// </summary>
  internal class RabbitMQHelper
  {
      private static object InitLock = new object(); //初始化锁 防止重复创建

      private static Dictionary<string, RabbitMQClient> RabbitMQClients = new Dictionary<string, RabbitMQClient>();  //RabbitMQ的缓存

      /// <summary>
      /// 获取RabbitMQClient。
      /// </summary>
      /// <param name="cfgName">配置名称。</param>
      /// <returns>RabbitMQ操作客户端。</returns>
      private static RabbitMQClient GetMQClient(string cfgName)
      {
          try
          {
              if (RabbitMQClients.ContainsKey(cfgName))
              {
                  if (RabbitMQClients[cfgName].Channel.IsOpen)
                      return RabbitMQClients[cfgName];
                  else
                      RabbitMQClients.Remove(cfgName);
              }
              lock (InitLock)
              {
                  var newClient = CreateRabbitMQClient(cfgName);
                  RabbitMQClients.Add(cfgName, newClient);
              }
              return RabbitMQClients[cfgName];
          }
          catch (Exception ex)
          {
              LogTool.WriteError("RabbitMQHelper.GetMQClient", cfgName, ex);
          }
          return null;
      }

      /// <summary>
      /// 构建RabbitMQClient。
      /// </summary>
      /// <param name="cfgName"></param>
      /// <returns></returns>
      private static RabbitMQClient CreateRabbitMQClient(string cfgName)
      {
          RabbitMQClient mc = new RabbitMQClient();
          var connStr = ApolloConfigUtils.GetConfig(cfgName, "");
          if (string.IsNullOrEmpty(connStr))
              throw new ArgumentNullException("Rabbit的链接配置为空!");
          //字符串解析
          var items = connStr.Split(new string[] { ";" }, StringSplitOptions.RemoveEmptyEntries);
          var host = ""; //地址
          var user = ""; //用户名
          var pwd = ""; //密码
          var exchange = ""; //交换器
          var queue = ""; //队列
          var bindtype = "direct"; //绑定类型
          var mutex = ""; //是否是多模式(为了多消费而设计的特殊字段)
          var routing = ""; //路由规则
          var ttl = 0; //消息的自动过期时间
          var declare = "false"; //是否主动声明(建议手动创建)
          foreach (var item in items)
          {
              var kvPair = item.Split(new string[] { "=" }, StringSplitOptions.RemoveEmptyEntries);
              if (kvPair.Length != 2)
                  throw new ArgumentOutOfRangeException("Rabbit的连接配置错误!");

              switch (kvPair[0].ToLower())
              {
                  case "host":
                      host = kvPair[1];
                      break;
                  case "user":
                      user = kvPair[1];
                      break;
                  case "pwd":
                      pwd = kvPair[1];
                      break;
                  case "exchange":
                      exchange = kvPair[1];
                      break;
                  case "queue":
                      queue = kvPair[1];
                      break;
                  case "bindtype":
                      bindtype = kvPair[1];
                      break;
                  case "mutex":
                      mutex = kvPair[1];
                      break;
                  case "routing":
                      routing = kvPair[1];
                      break;
                  case "ttl":
                      ttl = int.Parse(kvPair[1]);
                      break;
                  case "declare":
                      declare = kvPair[1];
                      break;
              }
          }

          //分布式部署同时消费一个队列且消息共享,通过实现多个队列绑定实现,分发模式需要为fanout或者指定routingkey
          if (mutex == "true")
          {
              var ip2mq = JsonConvert.DeserializeObject<Dictionary<string, Dictionary<string, int>>>(ConfigContainer.IP2MQQueueHash);
              if (ip2mq != null && ip2mq.ContainsKey(cfgName) && ip2mq[cfgName].ContainsKey(DnsTool.GetNativeIP()))
              {
                  queue = queue + "_" + ip2mq[cfgName][DnsTool.GetNativeIP()];
              }
              else
              {
                  queue = queue + "_" + 1;
              }
          }

          //构建RabbitMQ的链接
          //AutomaticRecoveryEnabled:用于设置断开后自动回复连接,防止当队列服务重启后,RabbitMQ会报Already closed异常
          var factory = new ConnectionFactory()
          {
              UserName = user,
              Password = pwd,
              RequestedHeartbeat = 30, //心跳包
              AutomaticRecoveryEnabled = true, //自动重连
              TopologyRecoveryEnabled = true, //拓普重连
              NetworkRecoveryInterval = TimeSpan.FromSeconds(10)
          };

          var hosts = host.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
          mc.Connection = factory.CreateConnection(hosts);
          mc.Channel = mc.Connection.CreateModel();
          var properties = mc.Channel.CreateBasicProperties();
          properties.DeliveryMode = 2;
          mc.BasicProperties = properties;
          mc.Exchange = exchange;
          mc.Queue = queue;
          mc.Routing = queue;
          if (!string.IsNullOrEmpty(routing))
              mc.Routing = routing;

          //掉线重连并监听消息队列
          mc.Connection.ConnectionShutdown += (_sender, _e) =>
          {
              mc.Connection = factory.CreateConnection(hosts);
              mc.Channel = mc.Connection.CreateModel();
              properties.DeliveryMode = 2;
              mc.BasicProperties = properties;
              mc.Exchange = exchange;
              mc.Queue = queue;
          };

          //Exchange 声明
          if (declare == "true")
          {
              if (!string.IsNullOrEmpty(exchange))
              {
                  mc.Channel.ExchangeDeclare(exchange, bindtype, true);
              }

              if (!string.IsNullOrEmpty(queue))
              {
                  Dictionary<string, object> arguments = null;
                  if (ttl > 0)
                  {
                      //RabbitMQ支持对消息和队列设置TTL,当消息超过设置的TTL时,数据会从队列中抹去,但不保证实时
                      //TTL的单位为ms
                      //另一种做法是在消息发布时设置expiration,单位也是ms
                      arguments.Add("x-message-ttl", ttl);
                  }
                  mc.Channel.QueueDeclare(queue, true, false, false, null);
                  mc.Channel.QueueBind(queue, exchange, mc.Routing, null);
              }
          }
          return mc;
      }

      /// <summary>
      /// 消费数据。
      /// </summary>
      /// <param name="cfgName">配置名。</param>
      /// <param name="isAck">是否自动销毁数据。</param>
      /// <returns>数据。</returns>
      internal static string SubMsg(string cfgName, bool isAck = true)
      {
          try
          {
              var mc = GetMQClient(cfgName);
              var gr = mc.Channel.BasicGet(mc.Queue, isAck);
              if (gr == null || gr.Body == null)
                  return null;

              var msg = Encoding.UTF8.GetString(gr.Body);
              LogTool.WriteSystemLog(cfgName + " RabbitMsg:", msg + "," + gr.DeliveryTag);

              return msg;
          }
          catch (RabbitMQClientException ex)
          {
              LogTool.WriteError("RabbitMQHelper.SubMsg", cfgName, ex);
              RabbitMQClients.Remove(cfgName);
          }
          return null;
      }

      /// <summary>
      /// 从队列中取出消息,消息不会自动ACK,需要在Handler手动剔除,该方法会阻塞当前进程。
      /// </summary>
      /// <param name="cfgName">配置名称。</param>
      /// <param name="queue">队列名称。</param>
      /// <param name="receivedHandler">监听到消息时的处理函数。</param>
      /// <param name="isDefaultChannel">是否启用新的Channel。</param>
      /// <remarks>
      /// 注1:请勿使用循环的方式去BasicConsume,这会创建一个新的消费者,而且并不会销毁。
      /// 注2:多模式,在使用RoutintKey时,请勿使用,会有重复消费的BUG,建议使用SubMsg。
      /// </remarks>
      internal static void ConsumeQueue(string cfgName, Action<string> receivedHandler)
      {
          try
          {
              //多线程消费时不建议使用同一个Channel
              //请勿将消费短设置为AutoAck,这种模式下Broker把消息发送给消费者就会确认,而不关心消费者的处理能力,极端情况下会爆掉消费者的内存,所以需要配合Qos来使用
              var mc = GetMQClient(cfgName);
              if (mc.Consumer == null || !mc.Consumer.IsRunning)
              {
                  LogTool.WriteSystemLog("开启监听:", cfgName);
                  mc.Channel.BasicQos(0, 2, false);
                  mc.Consumer = new EventingBasicConsumer(mc.Channel);
                  mc.Consumer.Received += (model, args) =>
                  {
                      if (args != null && args.Body != null)
                      {
                          var msg = Encoding.UTF8.GetString(args.Body);
                          LogTool.WriteSystemLog(cfgName + " RabbitMsg:", msg + "," + args.DeliveryTag);
                          receivedHandler(msg);
                          mc.Channel.BasicAck(args.DeliveryTag, false);
                      }
                  };
                  mc.Channel.BasicConsume(mc.Queue, false, mc.Consumer);
              }
          }
          catch (Exception ex)
          {
              LogTool.WriteError("RabbitMQHelper.HandlerAllMessages", cfgName, ex);
          }
      }

      /// <summary>
      /// 推送消息(单条)。
      /// </summary>
      /// <param name="msg">待推送消息主体。</param>
      /// <param name="cfgName">配置。</param>
      /// <param name="queue">推送的队列名称。</param>
      /// <param name="exchange">转发器的名称。</param>
      internal static void PubMsg(string cfgName, string msg)
      {
          try
          {
              var mc = GetMQClient(cfgName);
              mc.Channel.BasicPublish(mc.Exchange, mc.Routing, mc.BasicProperties, Encoding.UTF8.GetBytes(msg));
          }
          catch (Exception ex)
          {
              LogTool.WriteError("RabbitMQHelper.PubMsg", cfgName + "," + msg, ex);
          }
      }

      /// <summary>
      /// 推送消息(单条)。
      /// </summary>
      /// <param name="msg">待推送消息主体。</param>
      /// <param name="cfgName">配置。</param>
      /// <param name="queue">推送的队列名称。</param>
      /// <param name="exchange">转发器的名称。</param>
      /// <param name="ttl">过期时间(ms)。</param>
      internal static void PubMsg(string cfgName, string msg, int ttl)
      {
          try
          {
              var mc = GetMQClient(cfgName);
              mc.BasicProperties.Expiration = ttl.ToString();
              mc.Channel.BasicPublish(mc.Exchange, mc.Routing, mc.BasicProperties, Encoding.UTF8.GetBytes(msg));
          }
          catch (Exception ex)
          {
              LogTool.WriteError("RabbitMQHelper.PubMsg", cfgName + "," + msg, ex);
          }
      }

      /// <summary>
      /// 消费确认。
      /// </summary>
      /// <param name="cfgName">配置。</param>
      /// <param name="deliveryTag">信息传输标识。</param>
      internal static void Ack(string cfgName, ulong deliveryTag)
      {
          try
          {
              var mc = GetMQClient(cfgName);
              mc.Channel.BasicAck(deliveryTag, false);            //如果将multipe设置为true,则会一次性ack所有小于deliveryTag的值
          }
          catch (Exception ex)
          {
              LogTool.WriteError("RabbitMQHelper.Ack", cfgName + "," + deliveryTag, ex);
          }
      }


      /// <summary>
      /// 清除所有存储在Queue中的内容。
      /// </summary>
      /// <param name="cfgName">配置。</param>
      internal static void PurgeQueue(string cfgName)
      {
          try
          {
              var mc = GetMQClient(cfgName);
              mc.Channel.QueuePurge(mc.Queue);
          }
          catch (Exception ex)
          {
              LogTool.WriteError("RabbitMQHelper.PurgeQueue", cfgName, ex);
          }
      }


      /// <summary>
      /// 关闭RabbitMQ客户端。
      /// </summary>
      /// <param name="cfgName">关闭的配置。</param>
      internal static void Close(string cfgName)
      {
          try
          {
              if (RabbitMQClients.ContainsKey(cfgName))
              {
                  RabbitMQClients[cfgName].Channel.Close();
                  RabbitMQClients[cfgName].Connection.Close();
              }
          }
          catch (Exception ex)
          {
              LogTool.WriteError("RabbitMQHelper.Close", cfgName, ex);
          }
      }


      /// <summary>
      /// 关闭所有的RabbitMQ客户端。
      /// </summary>
      internal static void CloseAll()
      {
          foreach (var rKey in RabbitMQClients.Keys)
          {
              Close(rKey);
          }
      }
  }


  /// <summary>
  /// RabbitMQ 客户端。
  /// </summary>
  internal class RabbitMQClient
  {
      /// <summary>
      /// RabbitMQ 链接。
      /// </summary>
      internal IConnection Connection { get; set; }

      /// <summary>
      /// RabbitMQ 会话。
      /// </summary>
      internal IModel Channel { get; set; }

      /// <summary>
      /// 消费设置。
      /// </summary>
      internal IBasicProperties BasicProperties { get; set; }

      /// <summary>
      /// 路由器名称。
      /// </summary>
      internal string Exchange { get; set; }

      /// <summary>
      /// 队列名称。
      /// </summary>
      internal string Queue { get; set; }

      /// <summary>
      /// 映射。
      /// </summary>
      internal string Routing { get; set; }

      /// <summary>
      /// 消费者。
      /// </summary>
      internal EventingBasicConsumer Consumer { get; set; }
  }

推荐阅读