API Preview tool | The Dojo Toolkit

dojox/html/format.js

Provides:
- dojox.html.format
dojox.html.format.prettyPrint
- type
```
Function
```
- parameters:
  - html: (typeof String)
```
The string of HTML to try and generate a 'pretty' formatting.
```
  - indentBy: (typeof Integer)
```
Optional input for the number of spaces to use when indenting.
If not defined, zero, negative, or greater than 10, will just use tab
as the indent.
```
  - maxLineLength: (typeof Integer)
```
Optional input for the number of characters a text line should use in
the document, including the indent if possible.
```
  - map: (typeof Array)
```
Optional array of entity mapping characters to use when processing the
HTML Text content.  By default it uses the default set used by the
dojox.html.entities.encode function.
```
  - xhtml: (typeof boolean)
```
Optional parameter that declares that the returned HTML should try to be 'xhtml' compatible.
This means normally unclosed tags are terminated with /&gt; instead of &gt;.  Example: &lt;hr&gt; -&gt; &lt;hr /&gt;
```
- source: [view]
  var content = [];
  var indentDepth = 0;
  var closeTags = [];
  var iTxt = "\t";
  var textContent = "";
  var inlineStyle = [];
  var i;
  
  // Compile regexps once for this call.
  var rgxp_fixIEAttrs = /[=]([^"']+?)(\s|>)/g;
  var rgxp_styleMatch = /style=("[^"]*"|'[^']*'|\S*)/gi;
  var rgxp_attrsMatch = /[\w-]+=("[^"]*"|'[^']*'|\S*)/gi;
  
  // Check to see if we want to use spaces for indent instead
  // of tab.
  if(indentBy && indentBy > 0 && indentBy < 10){
    iTxt = "";
    for(i = 0; i < indentBy; i++){
     iTxt += " ";
    }
  }
  
  //Build the content outside of the editor so we can walk
  //via DOM and build a 'pretty' output.
  var contentDiv = dojo.doc.createElement("div");
  contentDiv.innerHTML = html;
  
  // Use the entity encode/decode functions, they cache on the map,
  // so it won't multiprocess a map.
  var encode = dojox.html.entities.encode;
  var decode = dojox.html.entities.decode;
  
  /** Define a bunch of formatters to format the output. **/
  var isInlineFormat = function(tag){
    // summary:
    //  Function to determine if the current tag is an inline
    //  element that does formatting, as we don't want to
    //  break/indent around it, as it can screw up text.
    // tag:
    //  The tag to examine
    switch(tag){
     case "a":
     case "b":
     case "strong":
     case "s":
     case "strike":
     case "i":
     case "u":
     case "em":
     case "sup":
     case "sub":
     case "span":
     case "font":
     case "big":
     case "cite":
     case "q":
     case "small":
      return true;
     default:
      return false;
    }
  };
  
  //Create less divs.
  var div = contentDiv.ownerDocument.createElement("div");
  var outerHTML = function(node){
    // summary:
    //  Function to return the outer HTML of a node.
    //  Yes, IE has a function like this, but using cloneNode
    //  allows avoiding looking at any child nodes, because in this
    //  case, we don't want them.
    var clone = node.cloneNode(false);
    div.appendChild(clone);
    var html = div.innerHTML;
    div.innerHTML = "";
    return html;
  };
  
  var sizeIndent = function(){
    var i, txt = "";
    for(i = 0; i < indentDepth; i++){
     txt += iTxt;
    }
    return txt.length;
  }
  
  var indent = function(){
    // summary:
    //  Function to handle indent depth.
    var i;
    for(i = 0; i < indentDepth; i++){
     content.push(iTxt);
    }
  };
  var newline = function(){
    // summary:
    //  Function to handle newlining.
    content.push("\n");
  };
  
  var processTextNode = function(n){
    // summary:
    //  Function to process the text content for doc
    //  insertion
    // n:
    //  The text node to process.
    textContent += encode(n.nodeValue, map);
  };
  
  var formatText = function(txt){
    // summary:
    //  Function for processing the text content encountered up to a
    //  point and inserting it into the formatted document output.
    // txt:
    //  The text to format.
    var i;
    var _iTxt;
  
    // Clean up any indention organization since we're going to rework it
    // anyway.
    var _lines = txt.split("\n");
    for(i = 0; i < _lines.length; i++){
     _lines[i] = dojo.trim(_lines[i]);
    }
    txt = _lines.join(" ");
    txt = dojo.trim(txt);
    if(txt !== ""){
     var lines = [];
     if(maxLineLength && maxLineLength > 0){
      var indentSize = sizeIndent();
      var maxLine = maxLineLength;
      if(maxLineLength > indentSize){
       maxLine -= indentSize;
      }
      while(txt){
       if(txt.length > maxLineLength){
        for(i = maxLine; (i > 0 && txt.charAt(i) !== " "); i--){
         // Do nothing, we're just looking for a space to split at.
        }
        if(!i){
         // Couldn't find a split going back, so go forward.
         for(i = maxLine; (i < txt.length && txt.charAt(i) !== " "); i++){
          // Do nothing, we're just looking for a space to split at.
         }
        }
        var line = txt.substring(0, i);
        line = dojo.trim(line);
        // Shift up the text string to the next chunk.
        txt = dojo.trim(txt.substring((i == txt.length)?txt.length:i + 1, txt.length));
        if(line){
         _iTxt = "";
         for(i = 0; i < indentDepth; i++){
          _iTxt += iTxt;
         }
         line = _iTxt + line + "\n";
        }
        lines.push(line);
       }else{
        // Line is shorter than out desired length, so use it.
        // as/is
        _iTxt = "";
        for(i = 0; i < indentDepth; i++){
         _iTxt += iTxt;
        }
        txt = _iTxt + txt + "\n";
        lines.push(txt);
        txt = null;
       }
      }
      return lines.join("");
     }else{
      _iTxt = "";
      for(i = 0; i < indentDepth; i++){
       _iTxt += iTxt;
      }
      txt = _iTxt + txt + "\n";
      return txt;
     }
    }else{
     return "";
    }
  };
  
  var processScriptText = function(txt){
    // summary:
    //  Function to clean up potential escapes in the script code.
    if(txt){
     txt = txt.replace(/"/gi, "\"");
     txt = txt.replace(/>/gi, ">");
     txt = txt.replace(/</gi, "<");
     txt = txt.replace(/&/gi, "&");
    }
    return txt;
  };
  
  var formatScript = function(txt){
    // summary:
    //  Function to rudimentary formatting of script text.
    //  Not perfect, but it helps get some level of organization
    //  in there.
    // txt:
    //  The script text to try to format a bit.
    if(txt){
     txt = processScriptText(txt);
     var i, t, c, _iTxt;
     var indent = 0;
     var scriptLines = txt.split("\n");
     var newLines = [];
     for (i = 0; i < scriptLines.length; i++){
      var line = scriptLines[i];
      var hasNewlines = (line.indexOf("\n") > -1);
      line = dojo.trim(line);
      if(line){
       var iLevel = indent;
       // Not all blank, so we need to process.
       for(c = 0; c < line.length; c++){
        var ch = line.charAt(c);
        if(ch === "{"){
         indent++;
        }else if(ch === "}"){
         indent--;
         // We want to back up a bit before the
         // line is written.
         iLevel = indent;
        }
       }
       _iTxt = "";
       for(t = 0; t < indentDepth + iLevel; t++){
        _iTxt += iTxt;
       }
       newLines.push(_iTxt + line + "\n");
      }else if(hasNewlines && i === 0){
       // Just insert a newline for blank lines as
       // long as it's not the first newline (we
       // already inserted that in the openTag handler)
       newLines.push("\n");
      }
  
     }
     // Okay, create the script text, hopefully reasonably
     // formatted.
     txt = newLines.join("");
    }
    return txt;
  };
  
  var openTag = function(node){
    // summary:
    //  Function to open a new tag for writing content.
    var name = node.nodeName.toLowerCase();
    // Generate the outer node content (tag with attrs)
    var nText = dojo.trim(outerHTML(node));
    var tag = nText.substring(0, nText.indexOf(">") + 1);
  
    // Also thanks to IE, we need to check for quotes around
    // attributes and insert if missing.
    tag = tag.replace(rgxp_fixIEAttrs,'="$1"$2');
  
    // And lastly, thanks IE for changing style casing and end
    // semi-colon and webkit adds spaces, so lets clean it up by
    // sorting, etc, while we're at it.
    tag = tag.replace(rgxp_styleMatch, function(match){
     var sL = match.substring(0,6);
     var style = match.substring(6, match.length);
     var closure = style.charAt(0);
     style = dojo.trim(style.substring(1,style.length -1));
     style = style.split(";");
     var trimmedStyles = [];
     dojo.forEach(style, function(s){
      s = dojo.trim(s);
      if(s){
       // Lower case the style name, leave the value alone. Mainly a fixup for IE.
       s = s.substring(0, s.indexOf(":")).toLowerCase() + s.substring(s.indexOf(":"), s.length);
       trimmedStyles.push(s);
      }
     });
     trimmedStyles = trimmedStyles.sort();
  
  
     // Reassemble and return the styles in sorted order.
     style = trimmedStyles.join("; ");
     var ts = dojo.trim(style);
     if(!ts || ts === ";"){
      // Just remove any style attrs that are empty.
      return "";
     }else{
      style += ";";
      return sL + closure + style + closure;
     }
    });
  
    // Try and sort the attributes while we're at it.
    var attrs = [];
    tag = tag.replace(rgxp_attrsMatch, function(attr){
     attrs.push(dojo.trim(attr));
     return "";
    });
    attrs = attrs.sort();
  
    // Reassemble the tag with sorted attributes!
    tag = "<" + name;
    if(attrs.length){
      tag += " " + attrs.join(" ");
    }
  
    // Determine closure status. If xhtml,
    // then close the tag properly as needed.
    if(nText.indexOf("   closeTags.push(name);
     tag += ">";
    }else{
     if(xhtml){
      tag += " />";
     }else{
      tag += ">";
     }
     closeTags.push(false);
    }
  
    var inline = isInlineFormat(name);
    inlineStyle.push(inline);
    if(textContent && !inline){
     // Process any text content we have that occurred
     // before the open tag of a non-inline.
     content.push(formatText(textContent));
     textContent = "";
    }
  
    // Determine if this has a closing tag or not!
    if(!inline){
     indent();
     content.push(tag);
     newline();
     indentDepth++;
    }else{
     textContent += tag;
    }
  
  
  };
  
  var closeTag = function(){
    // summary:
    //  Function to close out a tag if necessary.
    var inline = inlineStyle.pop();
    if(textContent && !inline){
     // Process any text content we have that occurred
     // before the close tag.
     content.push(formatText(textContent));
     textContent = "";
    }
    var ct = closeTags.pop();
    if(ct){
     ct = "";
     if(!inline){
      indentDepth--;
      indent();
      content.push(ct);
      newline();
     }else{
      textContent += ct;
     }
    }else{
     indentDepth--;
    }
  };
  
  var processCommentNode = function(n){
    // summary:
    //  Function to handle processing a comment node.
    // n:
    //  The comment node to process.
  
    //Make sure contents aren't double-encoded.
    var commentText = decode(n.nodeValue, map);
    indent();
    content.push("");
    newline();
  };
  
  var processNode = function(node) {
    // summary:
    //  Entrypoint for processing all the text!
    var children = node.childNodes;
    if(children){
     var i;
     for(i = 0; i < children.length; i++){
      var n = children[i];
      if(n.nodeType === 1){
       var tg = dojo.trim(n.tagName.toLowerCase());
       if(dojo.isIE && n.parentNode != node){
        // IE is broken. DOMs are supposed to be a tree.
        // But in the case of malformed HTML, IE generates a graph
        // meaning one node ends up with multiple references
        // (multiple parents). This is totally wrong and invalid, but
        // such is what it is. We have to keep track and check for
        // this because otherwise the source output HTML will have dups.
        continue;
       }
       if(tg && tg.charAt(0) === "/"){
        // IE oddity. Malformed HTML can put in odd tags like:
        // , . It treats a mismatched closure as a new
        // start tag. So, remove them.
        continue;
       }else{
        //Process non-dup, seemingly wellformed elements!
        openTag(n);
        if(tg === "script"){
         content.push(formatScript(n.innerHTML));
        }else if(tg === "pre"){
         var preTxt = n.innerHTML;
         if(dojo.isMoz){
          //Mozilla screws this up, so fix it up.
          preTxt = preTxt.replace("
  ", "\n");
          preTxt = preTxt.replace("
```
", "");
        preTxt = preTxt.replace("
```
  ", "");
         }
         // Add ending newline, if needed.
         if(preTxt.charAt(preTxt.length - 1) !== "\n"){
          preTxt += "\n";
         }
         content.push(preTxt);
        }else{
         processNode(n);
        }
        closeTag();
       }
      }else if(n.nodeType === 3 || n.nodeType === 4){
       processTextNode(n);
      }else if(n.nodeType === 8){
       processCommentNode(n);
      }
     }
    }
  };
  
  //Okay, finally process the input string.
  processNode(contentDiv);
  if(textContent){
    // Insert any trailing text. See: #10854
    content.push(formatText(textContent));
    textContent = "";
  }
  return content.join(""); //String
- summary
```
Function for providing a 'pretty print' version of HTML content from
the provided string.  It's nor perfect by any means, but it does
a 'reasonable job'.
```
- returns
```
String
```
dojox.html.format
- type
```
Object
```
- summary
dojox.html
- type
```
Object
```
- summary
dojox
- type
```
Object
```
- summary

dojox/html/format.js

Provides:

dojox.html.format.prettyPrint

dojox.html.format

dojox.html

dojox