存档

文章标签 ‘WORD’

Iavascript清理word生成的HTML冗余代码

2007年5月26日 没有评论

<script language=”javascript“>
<!–
function cleanWordHtml(html){
// Remove all SPAN tags
html = html.replace(/</?SPAN[^>]*>/gi, “”);
// Remove Class attributes
html = html.replace(/<(w[^>]*) class=([^ |>]*)([^>]*)/gi, “<$1$3″) ;
// Remove Style attributes
html = html.replace(/<(w[^>]*) style=”([^"]*)”([^>]*)/gi, “<$1$3″) ;
// Remove Lang attributes
html = html.replace(/<(w[^>]*) lang=([^ |>]*)([^>]*)/gi, “<$1$3″) ;
// Remove XML elements and declarations
html = html.replace(/<\??xml[^>]*>/gi, “”) ;
// Remove Tags with XML namespace declarations: <o:p></o:p>
html = html.replace(/</?w+:[^>]*>/gi, “”) ;
// Replace the &nbsp;
html = html.replace(/&nbsp;/, ” ” );
// Transform <P> to <DIV>
var re = new RegExp(“(<P)([^>]*>.*?)(</P>)”,”gi”) ;
// Different because of a IE 5.0 error
html = html.replace( re, “<div$2</div>”);return html;
}
//–>
</script>

分类: 网站建设, 转载 标签: , ,