【图】提取HTML代码中文字的C#函数

/// <summary
/// 去除HTML标记
/// </summary
/// <param name="strHtml"包括HTML的源码 </param
/// <returns已经去除后的文字</returns
public static string StripHTML(string strHtml)
{
string [] aryReg ={
@"<script[^]*?.*?</script",

（本文来源于图老师网站，更多请访问http://m.tulaoshi.com）

@"<(/s*)?!?((w+:)?w+)(w+(s*=?s*(([""'])(\[""'tbnr]|[^7])*?7|w+)|.{0})|s)*?(/s*)?",
          @"([rn])[s]+",
          @"&(quot|#34);",
          @"&(amp|#38);",
          @"&(lt|#60);",
          @"&(gt|#62);",
          @"&(nbsp|#160);",
          @"&(iexcl|#161);",
          @"&(cent|#162);",
          @"&(pound|#163);",
          @"&(copy|#169);",
          @"&#(d+);",
          @"--",
          @"<!--.*n"

         };

   string [] aryRep = {
           "",
           "",
           "",
           """,
           "&",
           "<",
           "",
           " ",
           "xa1",//chr(161),
           "xa2",//chr(162),
           "xa3",//chr(163),
           "xa9",//chr(169),
           "",
           "rn",
           ""
          };

   string newReg =aryReg[0];
   string strOutput=strHtml;
   for(int i = 0;i<aryReg.Length;i++)
   {
    Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );
    strOutput = regex.Replace(strOutput,aryRep[i]);
   }

（本文来源于图老师网站，更多请访问http://m.tulaoshi.com）

   strOutput.Replace("<","");
   strOutput.Replace("","");
   strOutput.Replace("rn","");

return strOutput;
}