正则表达式彻底去除HTML

    技术2022-05-11  67

     public static String RMHTML(String Htmlstring)

     {

    /**

    转载请注明:

    PowerBy:Lulu

    Www.HotCity.Cn

    */

    // 删除脚本

     Htmlstring = RegexPattern("<//s*?script[^>]*>[//s//S]*?<//s*?///s*?script//s*?>","",Htmlstring);

    // 删除HTML

     Htmlstring = RegexPattern("<([^>]*)>", "",Htmlstring);

     Htmlstring = RegexPattern( "([/r/n])[//s]+", "",Htmlstring);

     Htmlstring = RegexPattern( "-->", "",Htmlstring);

     Htmlstring = RegexPattern( "<!--.*", "",Htmlstring);

     Htmlstring = RegexPattern( "&(quot|#34);", "/"",Htmlstring);

     Htmlstring = RegexPattern( "&(amp|#38);", "&",Htmlstring);

     Htmlstring = RegexPattern( "&(lt|#60);", "<",Htmlstring);

     Htmlstring = RegexPattern( "&(gt|#62);", ">",Htmlstring);

     Htmlstring = RegexPattern( "&(nbsp|#160);", " ",Htmlstring);

     Htmlstring = RegexPattern( "&(iexcl|#161);", "//xa1",Htmlstring);

     Htmlstring = RegexPattern( "&(cent|#162);", "//xa2",Htmlstring);

     Htmlstring = RegexPattern( "&(pound|#163);", "//xa3",Htmlstring);

     Htmlstring = RegexPattern( "&(copy|#169);", "//xa9",Htmlstring);

     Htmlstring = RegexPattern( "&#(//d+);", "",Htmlstring);

     

     Htmlstring = RegexPattern("<", "",Htmlstring);

     Htmlstring = RegexPattern(">", "",Htmlstring);

     //Htmlstring.replace("/r/n", "",Htmlstring);

     

     return Htmlstring;

     }   public static String RegexPattern(String pattern,String str,String content){

           if(pattern!=null && !pattern.equals("")){

               Pattern p = Pattern.compile(pattern,2); //参数2表示大小写不区分

               Matcher m = p.matcher(content);           content=m.replaceAll(str); 

           }       return content;          } 


    最新回复(0)