Hello,
We are currently considering implementing FCKeditor into our Plone site. We will be doing a lot of conversion from Word files.
When we convert, we noticed that <div> tags are created instead of <p>. Here is an example of some of that code:
<div><b><u>Purpose</u></b></div>
<div> </div>
<div>....</div>
As you can see there should technically be no reason for using <div>s in this situation (as well as the <div> </div>). Since we are mandated to use code that is as standard and accessible as possible, would not following code would be preferable?
<p><b><u>Purpose</u></b></p>
<p>....</p>
Thank you for you help!
We are currently considering implementing FCKeditor into our Plone site. We will be doing a lot of conversion from Word files.
When we convert, we noticed that <div> tags are created instead of <p>. Here is an example of some of that code:
<div><b><u>Purpose</u></b></div>
<div> </div>
<div>....</div>
As you can see there should technically be no reason for using <div>s in this situation (as well as the <div> </div>). Since we are mandated to use code that is as standard and accessible as possible, would not following code would be preferable?
<p><b><u>Purpose</u></b></p>
<p>....</p>
Thank you for you help!
RE: Why using <div>?
RE: Why using <div>?
<code>
function CleanWord( html )
{
var bIgnoreFont = document.getElementById('chkRemoveFont').checked ;
var bRemoveStyles = document.getElementById('chkRemoveStyles').checked ;
html = html.replace(/<o:p>\s*<\/o:p>/g, "") ;
html = html.replace(/<o:p>.*?<\/o:p>/g, " ") ;
// Remove mso-xxx styles.
html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi, "" ) ;
// Remove margin styles.
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, "" ) ;
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, "" ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*tab-stops:[^;"]*;?/gi, "" ) ;
html = html.replace( /\s*tab-stops:[^"]*/gi, "" ) ;
// Custom, always remove font faces..
// Remove FONT face attributes.
// if ( bIgnoreFont )
// {
html = html.replace( /\s*face="[^"]*"/gi, "" ) ;
html = html.replace( /\s*face=[^ >]*/gi, "" ) ;
html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, "" ) ;
// }
// Remove Class attributes
html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Custom, deactivate Remove styles selection
// if ( bRemoveStyles )
// Remove empty styles.
html = html.replace( /\s*style="\s*"/gi, '' ) ;
html = html.replace( /<SPAN\s*[^>]*>\s* \s*<\/SPAN>/gi, ' ' ) ;
html = html.replace( /<SPAN\s*[^>]*><\/SPAN>/gi, '' ) ;
// Remove Lang attributes
html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;
html = html.replace( /<SPAN\s*>(.*?)<\/SPAN>/gi, '$1' ) ;
html = html.replace( /<FONT\s*>(.*?)<\/FONT>/gi, '$1' ) ;
// Remove XML elements and declarations
html = html.replace(/<\\?\?xml[^>]*>/gi, "") ;
// Remove Tags with XML namespace declarations: <o:p></o:p>
html = html.replace(/<\/?\w+:[^>]*>/gi, "") ;
html = html.replace( /<H\d>\s*<\/H\d>/gi, '' ) ;
/* Custom, remove h1-6 conversion
html = html.replace( /<H1([^>]*)>/gi, '<div$1><b><font size="6">' ) ;
html = html.replace( /<H2([^>]*)>/gi, '<div$1><b><font size="5">' ) ;
html = html.replace( /<H3([^>]*)>/gi, '<div$1><b><font size="4">' ) ;
html = html.replace( /<H4([^>]*)>/gi, '<div$1><b><font size="3">' ) ;
html = html.replace( /<H5([^>]*)>/gi, '<div$1><b><font size="2">' ) ;
html = html.replace( /<H6([^>]*)>/gi, '<div$1><b><font size="1">' ) ;
html = html.replace( /<\/H\d>/gi, '</font></b></div>' ) ; */
html = html.replace( /<(U|I|STRIKE)> <\/\1>/g, ' ' ) ;
// Remove empty tags (three times, just to be sure).
html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
// Custom, remove p to div conversion
// Transform <P> to <DIV>
// var re = new RegExp("(<P)([^>]*)>.*?)(<\/P>)","gi") ; // Different because of a IE 5.0 error
// html = html.replace( re, "<div$2<br \/>" ) ;
// Custom, instead of p to div replace </p>\r\n<p> with <br />\r\n
html = html.replace( /<\/p>\r\n<p>/gi, '<br />\r\n' ) ;
// Custom, try to make paragraphs based on linebreaks
html = html.replace( /<br \/>\r\n <br \/>\r\n/gi, "</p><p>" ) ;
// Custom, remove every font tag since we don't like them
html = html.replace( /<font(.*?)>/gi, "" ) ;
html = html.replace( /<\/font>/gi, "" ) ;
// Custom, remove every tag style except in span tags
html = html.replace( /<(.[^span]*)style="(.*?)"/gi, "<$1" ) ;
html = html.replace( /<table style="(.*?)"/gi, "<table" ) ; // Hmm.. why won't this work in the line above?
return html ;
}
</code>