beautify.module

Tracking 6.x-1.x branch
  1. drupal
    1. 6 contributions/beautify/beautify.module

HTML output processor for beautification, compacting and general processing.

This module buffers the HTML output of Drupal and processes it through HTML Tidy, htmLawed or a simple built-in function to beautify, flatten or compact the HTML source code.

It works with the cache system in that it sets the processed HTML to the page cache.

Functions & methods

NameDescription
beautify_exitFlush the output buffer and send the contents to the router for processing.
beautify_get_optionsReturns options avilable to the active mode.
beautify_htmlawed_processProcesseses the HTML document through htmLawed.
beautify_htmltidy_commandProcess whatever we are given and return the HTML Tidy response The output and warnings will be returned as arrays by reference.
beautify_htmltidy_processProcess the buffered output through the HTML Tidy processor.
beautify_htmltidy_run
beautify_htmltidy_test
beautify_initStart the output buffering.
beautify_menuImplementation of hook_menu().
beautify_methodsCalculate the available methods.
beautify_parse_htmlParse the HTML document into sections.
beautify_permImplementation of hook_perm().
beautify_processBuilt-in processing function for compacting or flattening the output.
beautify_routerRoute the buffered output to the active processing routine.
beautify_settingsAdmin settings form.
beautify_set_cacheSet the processed HTML to the page cache.
beautify_themeImplementation of hook_theme().
theme_beautify_htmltidy_errorsTheme function for displaying the errors.

File

View source
  1. <?php
  2. /**
  3. * @file
  4. * HTML output processor for beautification, compacting and general processing.
  5. *
  6. * This module buffers the HTML output of Drupal and processes it through
  7. * HTML Tidy, htmLawed or a simple built-in function to beautify, flatten or
  8. * compact the HTML source code.
  9. *
  10. * It works with the cache system in that it sets the processed HTML to the
  11. * page cache.
  12. */
  13. /**
  14. * Implementation of hook_menu().
  15. */
  16. function beautify_menu() {
  17. $items = array();
  18. $items['admin/settings/beautify'] = array(
  19. 'title' => 'Beautify',
  20. 'description' => 'Configure settings for the processing of HTML output to the browser.',
  21. 'page callback' => 'drupal_get_form',
  22. 'page arguments' => array('beautify_settings'),
  23. 'access arguments' => array('administer beautify')
  24. );
  25. return $items;
  26. }
  27. /**
  28. * Implementation of hook_perm().
  29. */
  30. function beautify_perm() {
  31. return array(
  32. 'administer beautify',
  33. 'use beautify debug mode',
  34. );
  35. }
  36. /**
  37. * Implementation of hook_theme().
  38. */
  39. function beautify_theme($existing, $type, $theme, $path) {
  40. return array('beautify_errors' => array('errors' => NULL));
  41. }
  42. /**
  43. * Admin settings form.
  44. */
  45. function beautify_settings() {
  46. // Fetch an object with the options including defaults.
  47. $methods = beautify_methods();
  48. $form['beautify_enabled'] = array(
  49. '#type' => 'checkbox',
  50. '#title' => t('Enable output processing'),
  51. '#default_value' => variable_get('beautify_enabled', 1),
  52. );
  53. $form['beautify_method'] = array(
  54. '#type' => 'radios',
  55. '#title' => t('Method'),
  56. '#options' => $methods->methods,
  57. '#default_value' => $methods->default,
  58. );
  59. $form['beautify_mode'] = array(
  60. '#type' => 'radios',
  61. '#title' => t('Mode'),
  62. '#options' => $methods->options,
  63. '#default_value' => $methods->mode,
  64. );
  65. // Advanced options.
  66. $form['options'] = array(
  67. '#type' => 'fieldset',
  68. '#title' => t('Advanced options'),
  69. '#collapsible' => TRUE,
  70. '#collapsed' => TRUE,
  71. );
  72. switch ($methods->default) {
  73. // HTML Tidy options
  74. case 'htmltidy':
  75. $form['options']['beautify_htmltidy_valid_xhtml'] = array(
  76. '#type' => 'checkbox',
  77. '#title' => t('Output valid XHTML'),
  78. '#default_value' => variable_get('beautify_htmltidy_valid_xhtml', 1),
  79. );
  80. $form['options']['beautify_htmltidy_warnings'] = array(
  81. '#type' => 'checkbox',
  82. '#title' => t('Append errors'),
  83. '#default_value' => variable_get('beautify_htmltidy_warnings', 0),
  84. );
  85. $form['options']['beautify_htmltidy_doctype'] = array(
  86. '#type' => 'radios',
  87. '#title' => t('DOCTYPE'),
  88. '#description' => t('This option specifies the DOCTYPE declaration generated by Tidy.'),
  89. '#options' => array(
  90. 'omit' => '<strong>Omit</strong>: the output won\'t contain a DOCTYPE declaration',
  91. 'auto' => '<strong>Auto</strong>: Use an educated guess based upon the contents of the document',
  92. 'strict' => '<strong>Strict</strong>: set the DOCTYPE to the strict DTD',
  93. 'transitional' => '<strong>Transitional</strong>: set the DOCTYPE to the transitional (loose) DTD',
  94. 'theme' => '<strong>Theme</strong>: Use the existing DOCTYPE from the theme (page.tpl.php file)',
  95. ),
  96. '#default_value' => variable_get('beautify_htmltidy_doctype', 'theme'),
  97. );
  98. $form['options']['msword'] = array(
  99. '#type' => 'fieldset',
  100. '#title' => t('Clean MS Word'),
  101. '#collapsible' => TRUE,
  102. '#collapsed' => TRUE,
  103. );
  104. $form['options']['msword']['beautify_htmltidy_msword_bare'] = array(
  105. '#type' => 'checkbox',
  106. '#title' => t('Convert non-breaking spaces to spaces'),
  107. '#description' => t('This option specifies if Tidy should strip Microsoft specific HTML from Word 2000 documents, and output spaces rather than non-breaking spaces where they exist in the input.'),
  108. '#default_value' => variable_get('beautify_htmltidy_word_bare', 0),
  109. );
  110. $form['options']['msword']['beautify_htmltidy_word_2000'] = array(
  111. '#type' => 'checkbox',
  112. '#title' => t('Strip MS Word 2000 HTML'),
  113. '#description' => t('This option specifies if Tidy should go to great pains to strip out all the surplus stuff Microsoft Word 2000 inserts when you save Word documents as "Web pages". Doesn\'t handle embedded images or VML. You should consider using Word\'s "Save As: Web Page, Filtered".'),
  114. '#default_value' => variable_get('beautify_htmltidy_word_2000', 0),
  115. );
  116. $form['options']['msword']['beautify_htmltidy_word_attributes'] = array(
  117. '#type' => 'checkbox',
  118. '#title' => t('Strip proprietary attributes'),
  119. '#description' => t('This option specifies if Tidy should strip out proprietary attributes, such as MS data binding attributes.'),
  120. '#default_value' => variable_get('beautify_htmltidy_word_attributes', 0),
  121. );
  122. break;
  123. // htmLawed options
  124. case 'htmlawed':
  125. $form['options']['beautify_htmlawed_valid_xhtml'] = array(
  126. '#type' => 'checkbox',
  127. '#title' => t('Output valid XHTML'),
  128. '#description' => t('Output the most valid XHTML possible.'),
  129. '#default_value' => variable_get('beautify_htmlawed_valid_xhtml', 1),
  130. );
  131. $form['options']['beautify_htmlawed_balance_tags'] = array(
  132. '#type' => 'checkbox',
  133. '#title' => t('Balance tags'),
  134. '#description' => t('Balance tags for well-formedness and proper nesting.'),
  135. '#default_value' => variable_get('beautify_htmlawed_balance_tags', 1),
  136. );
  137. $form['options']['beautify_htmlawed_css_expressions'] = array(
  138. '#type' => 'checkbox',
  139. '#title' => t('Allow CSS expressions'),
  140. '#description' => t('Allow dynamic CSS expression by not removing the expression from CSS property values in style attributes. CSS expressions only work in Internet Explorer 5, 6 and 7.'),
  141. '#default_value' => variable_get('beautify_htmlawed_css_expressions', 0),
  142. );
  143. $form['options']['beautify_htmlawed_keep_bad'] = array(
  144. '#type' => 'radios',
  145. '#title' => t('Bad tags'),
  146. '#description' => t('Neutralize bad tags by converting < and > to entities, or remove them.'),
  147. '#options' => array(
  148. 0 => 'Remove',
  149. 1 => 'Neutralize both tags and element content',
  150. 2 => 'Remove tags but neutralize element content',
  151. 3 => 'Neutralize both tags and element content but remove if text is invalid in parent element',
  152. 4 => 'Remove tags but neutralize element content but remove if text is invalid in parent element',
  153. 5 => 'Neutralize both tags and element content but line-breaks, tabs and spaces are left',
  154. 6 => 'Remove tags but neutralize element content but line-breaks, tabs and spaces are left',
  155. ),
  156. '#default_value' => variable_get('beautify_htmlawed_keep_bad', 2),
  157. );
  158. $form['options']['beautify_htmlawed_strict_tags'] = array(
  159. '#type' => 'radios',
  160. '#title' => t('Strict tags'),
  161. '#description' => t('Transform/remove these non-strict XHTML elements, even if they are allowed by the admin: &lt;applet&gt;, &lt;center&gt;, &lt;dir&gt;, &lt;embed&gt;, &lt;font&gt;, &lt;isindex&gt;, &lt;menu&gt;, &lt;s&gt;, &lt;strike&gt;, &lt;u&gt;.'),
  162. '#options' => array(
  163. 0 => 'No',
  164. 1 => 'Yes, but leave applet, embed and isindex elements that currently can\'t be transformed',
  165. 2 => 'yes, removing applet, embed and isindex elements and their contents (nested elements remain)'
  166. ),
  167. '#default_value' => variable_get('beautify_htmlawed_strict_tags', 1),
  168. );
  169. $form['options']['beautify_htmlawed_clean_msword'] = array(
  170. '#type' => 'radios',
  171. '#title' => t('Clean MS Word'),
  172. '#description' => t('Replace discouraged characters introduced by Microsoft Word, etc.'),
  173. '#options' => array(
  174. 0 => 'No',
  175. 1 => 'Yes',
  176. 2 => 'Yes, plus replace special single & double quotes with ordinary ones'),
  177. '#default_value' => variable_get('beautify_htmlawed_clean_msword', 0),
  178. );
  179. $form['options']['beautify_htmlawed_comments'] = array(
  180. '#type' => 'radios',
  181. '#title' => t('Comment handling'),
  182. '#options' => array(
  183. 0 => 'Don\'t consider comments as markup and proceed as if plain text',
  184. 1 => 'Remove',
  185. 2 => 'Allow, but neutralize any &lt;, &gt;, and &amp; inside by converting to named entities',
  186. 3 => 'Allow'),
  187. '#default_value' => variable_get('beautify_htmlawed_comments', 3),
  188. );
  189. break;
  190. }
  191. return system_settings_form($form);
  192. }
  193. /**
  194. * Calculate the available methods.
  195. */
  196. function beautify_methods() {
  197. // Always add the built-in method since this should always be available.
  198. $methods->methods = array('builtin' => 'Built-in');
  199. // If HTML Tidy is found, add that method.
  200. if (beautify_htmltidy_test()) {
  201. $methods->methods += array('htmltidy' => 'HTMLTidy');
  202. }
  203. // If htmlLawed is found, add that method.
  204. $path = drupal_get_path('module', 'beautify');
  205. if (file_exists($path .'/htmLawed.php')) {
  206. $methods->methods += array('htmlawed' => 'htmLawed');
  207. }
  208. // Stash the current active method.
  209. $methods->default = variable_get('beautify_method', 'builtin');
  210. // Get the available options for the active method.
  211. $methods->options = beautify_get_options();
  212. // Get the active option for this method.
  213. $methods->mode = $mode = variable_get('beautify_mode', 2);
  214. // If the active method is not one of the available options...
  215. if (!isset($methods->options[$mode])) {
  216. // Set the mode
  217. $options = array_keys($methods->options);
  218. $methods->mode = $options[0];
  219. variable_set('beautify_mode', $options[0]);
  220. }
  221. return $methods;
  222. }
  223. /**
  224. * Returns options avilable to the active mode.
  225. */
  226. function beautify_get_options() {
  227. $options = array(
  228. 1 => 'Beautify',
  229. -1 => 'Compact',
  230. 2 => 'Flatten',
  231. );
  232. // No Beautify option available for built-in method.
  233. if (variable_get('beautify_method', 'builtin') == 'builtin') {
  234. unset($options[1]);
  235. }
  236. // No Flatten option available for htmLawed method.
  237. if (variable_get('beautify_method', 'builtin') == 'htmlawed') {
  238. unset($options[2]);
  239. }
  240. return $options;
  241. }
  242. /**
  243. * Start the output buffering.
  244. */
  245. function beautify_init() {
  246. if (variable_get('beautify_enabled', 1)) {
  247. // Ensure we are not serving a cached page.
  248. if (function_exists('drupal_set_content')) {
  249. ob_start();
  250. }
  251. }
  252. }
  253. /**
  254. * Flush the output buffer and send the contents to the router for processing.
  255. *
  256. * This hook will run Tidy twice if debug mode is enabled to get the line
  257. * numbers right on debug messages (this is highly recommendeded if the site
  258. * has enough CPU power).
  259. */
  260. function beautify_exit($destination = NULL) {
  261. if (variable_get('beautify_enabled', 1)) {
  262. // Ensure we are not serving a cached page.
  263. if (function_exists('drupal_set_content') && $destination == NULL) {
  264. $input = ob_get_contents();
  265. ob_end_clean();
  266. // Send the output to the router for processing.
  267. beautify_router($input);
  268. }
  269. }
  270. }
  271. /**
  272. * Route the buffered output to the active processing routine.
  273. *
  274. * The active mode is used to process the output and then set to the page
  275. * cache. It is then printed to the screen to be displayed on the first
  276. * subsequent page load. From then on it should be retreived from the
  277. * page cache.
  278. *
  279. * @param $input
  280. * The buffered output to be processed.
  281. */
  282. function beautify_router($input) {
  283. global $user;
  284. // We don't want to do anything if the user is authenticated since this has
  285. // been causing problems with node/add etc.
  286. if ($user->uid) {
  287. print $input;
  288. return;
  289. }
  290. $processor = variable_get('beautify_method', 'builtin');
  291. $mode = variable_get('beautify_mode', 2);
  292. switch ($processor) {
  293. case 'builtin':
  294. $output = beautify_process($input, $mode);
  295. break;
  296. case 'htmlawed':
  297. $output = beautify_htmlawed_process($input);
  298. break;
  299. case 'htmltidy':
  300. $output = beautify_htmltidy_process($input);
  301. break;
  302. }
  303. beautify_set_cache($output);
  304. print $output;
  305. }
  306. /**
  307. * Set the processed HTML to the page cache.
  308. *
  309. * This attempts to override the page_set_cache(). There could be a better way
  310. * to do this. Suggestions welcome!
  311. */
  312. function beautify_set_cache($input) {
  313. global $user, $base_root;
  314. if (!$user->uid && $_SERVER['REQUEST_METHOD'] == 'GET' && page_get_cache(TRUE)) {
  315. // This will fail in some cases, see page_get_cache() for the explanation.
  316. $cache = TRUE;
  317. if (variable_get('page_compression', TRUE) && function_exists('gzencode')) {
  318. // We do not store the data in case the zlib mode is deflate.
  319. // This should be rarely happening.
  320. if (zlib_get_coding_type() == 'deflate') {
  321. $cache = FALSE;
  322. }
  323. else if (zlib_get_coding_type() == FALSE) {
  324. $input = gzencode($input, 9, FORCE_GZIP);
  325. }
  326. // The remaining case is 'gzip' which means the data is
  327. // already compressed and nothing left to do but to store it.
  328. }
  329. if ($cache && $input) {
  330. cache_set($base_root . request_uri(), $input, 'cache_page', CACHE_TEMPORARY, drupal_get_headers());
  331. }
  332. }
  333. }
  334. /**
  335. * Process whatever we are given and return the HTML Tidy response
  336. * The output and warnings will be returned as arrays by reference.
  337. *
  338. * @param $input
  339. * html string to be tidied
  340. * @param $errors
  341. * an array to be filled with error info
  342. * @param $warnings
  343. * an array to be filled with warning info
  344. * @return
  345. * the tidied string
  346. */
  347. function beautify_htmltidy_command($input, &$errors, &$warnings) {
  348. $path = variable_get('beautify_htmltidy_path', drupal_get_path('module', 'beautify') . '/bin/tidy' . (strpos(PHP_OS, 'WIN' === 0) ? '.exe' : ''));
  349. if (!file_exists($path)) {
  350. $message = t("Couldn't find the Tidy binary at '%path', not using tidy.", array('%path' => $path));
  351. watchdog('beautify', $message, WATCHDOG_WARNING);
  352. $errors[] = $message;
  353. return;
  354. }
  355. /*
  356. * Do not pass the parameters their default values as defined in the
  357. * documentation for tidy (http://www.w3.org/People/Raggett/tidy/), or weird
  358. * stuff starts to happen.
  359. */
  360. // Output Valid XHMTL
  361. $args[] = '--output-xhtml '. variable_get('beautify_htmltidy_valid_xhtml', 1);
  362. // Choose DOCTYPE method - if out user based "Theme" option is set we need to
  363. // do some additional processing.
  364. if (variable_get('beautify_htmltidy_doctype', 'auto') == 'theme') {
  365. $args[] = '--doctype omit';
  366. $document = beautify_parse_html($input);
  367. $doctype = $document->dtd ."\n";
  368. }
  369. else {
  370. $args[] = '--doctype '. variable_get('beautify_htmltidy_doctype', 'auto');
  371. }
  372. // If mode is set to Beautify we need to set indentation to auto. Yes is not
  373. // recommended in the Tidy reference.
  374. if (variable_get('beautify_mode', 2) == 1) $args[] = '--indent auto';
  375. // TODO: Add these options to the settings page.
  376. if (!variable_get('beautify_htmltidy_verbose', 0)) $args[] = '-q';
  377. if (!variable_get('beautify_htmltidy_wrapphp', 0)) $args[] = '--wrap-php no';
  378. if (variable_get('beautify_htmltidy_clean', 0)) $args[] = '--clean yes';
  379. if (variable_get('beautify_htmltidy_enclosetext', 0)) $args[] = '--enclose-text yes';
  380. if (variable_get('beautify_htmltidy_encloseblocktext', 0)) $args[] = '--enclose-block-text yes';
  381. // Clean MS Word
  382. $args[] = '--bare '. variable_get('beautify_htmltidy_word_bare', 0);
  383. $args[] = '--word-2000 '. variable_get('beautify_htmltidy_word_2000', 0);
  384. $args[] = '--drop-proprietary-attributes '. variable_get('beautify_htmltidy_word_attributes', 0);
  385. // User specified configuration file
  386. $conf = variable_get('htmltidy_confpath', '');
  387. if (file_exists($conf)) {
  388. $args[] = '--config '. $conf;
  389. }
  390. // Don't add a meta tag with the Tidy info.
  391. $args[] = '--tidy-mark no';
  392. // Don't wrap output - this looks terrible so we always set this to off.
  393. $args[] = '-wrap 0';
  394. // Output only UTF-8
  395. $args[] = '-utf8';
  396. // Modify the input file instead of outputting to stdout.
  397. $args[] = '-modify';
  398. // Run the processing with the specified arguments.
  399. beautify_htmltidy_run($input, $args, $output, $errors = array(), $warnings = array());
  400. /*
  401. // Output debugging info.
  402. if (variable_get('htmltidy_warnings', 0) && user_access('use htmltidy debug mode')) {
  403. $header = "<style type=\"text/css\"> .htmltidy { border: 1px dashed #aaa; background-color: #eee; padding: 1em;\n"
  404. . "margin: 1em; float: left; font-family: \"courier new\", sans-serif; font-size: 8pt; color: #050; } </style>";
  405. drupal_set_html_head($header);
  406. // Run Tidy a second time to get line numbers right.
  407. if (variable_get('htmltidy_runtwice', 0)) {
  408. system("$apppath $cline -wrap $wordwrap -utf8 -f $warningsFilename $dirtyFilename");
  409. }
  410. $warnings = file_get_contents($warningsFilename);
  411. drupal_set_message("<h3>HTMLTidy Debug</h3><kbd>$apppath $cline -wrap $wordwrap -utf8 -f $warningsFilename $dirtyFilename</kbd>");
  412. }
  413. */
  414. // Additional processing for beatified output.
  415. if (variable_get('beautify_mode', 2) == 1) {
  416. // remove newline from empty script tags
  417. $output = preg_replace("@(<script[^>]*>)\n(<\/script>)@", '$1$2', $output);
  418. // add correct indentation for comments inside script tags
  419. $output = preg_replace("@(<script[^>]*>)\n(<!--)@", "$1\n $2", $output);
  420. // add newline and correct indentation for comments immediately following closing script tag
  421. $output = preg_replace("@(<\/script>)(<!--)@", "$1\n $2", $output);
  422. // add newline and correct indentation to opening tags immediately following closing div tag
  423. $output = preg_replace("@([ ]*)<\/div>(<)@", "$1</div>\n$1$2", $output);
  424. }
  425. // Additional processing for flattened output.
  426. if (variable_get('beautify_mode', 2) == 2) {
  427. // remove newline from empty script tags
  428. $output = preg_replace("@(<script[^>]*>)\n(<\/script>)@", '$1$2', $output);
  429. // add newline comments immediately following closing script tag
  430. $output = preg_replace("@(<\/script>)(<!--)@", "$1\n$2", $output);
  431. // remove all leading spaces
  432. $output = preg_replace('@(\n[ ]*<)@', "\n<", $output);
  433. }
  434. return $doctype . $output;
  435. }
  436. function beautify_htmltidy_run($input, $args, &$output, &$errors, &$warnings) {
  437. $tidypath = variable_get('beautify_htmltidy_path', drupal_get_path('module', 'beautify') . '/bin/tidy' . (strpos(PHP_OS, 'WIN' === 0) ? '.exe' : ''));
  438. if (!file_exists($tidypath)) {
  439. watchdog('beautify', 'Failed to find HTML Tidy executable at %beautify_htmltidy_path, not using tidy', array('%beautify_htmltidy_path' => $tidypath), WATCHDOG_WARNING);
  440. $output = '';
  441. return 2;
  442. }
  443. // write input to a file because tidy doesn't take input from stdin.
  444. $dirtyFilename = tempnam(file_directory_temp(), 'drup');
  445. $f = fopen($dirtyFilename, 'w');
  446. fwrite($f, $input);
  447. fclose($f);
  448. // warnings are saved to file
  449. $warningsFilename = tempnam(file_directory_temp(), 'warn');
  450. $args[] = '-f ' . $warningsFilename;
  451. // Run Tidy with the right options.
  452. $command = $tidypath .' '. implode(' ', $args) .' '. $dirtyFilename;
  453. system($command, $return_value);
  454. // return_value 0 means success. 1 means warning. 2 means error, the file
  455. // will be there, but not have been touched.
  456. switch ($return_value) {
  457. case 0:
  458. $warnings = $errors = array();
  459. $output = file_get_contents($dirtyFilename);
  460. break;
  461. case 1:
  462. $errors = array();
  463. $warnings = array_map('trim', file($warningsFilename));
  464. $output = file_get_contents($dirtyFilename);
  465. break;
  466. case 2:
  467. // separate errors and warnings into two different arrays
  468. foreach(file($warningsFilename) as $line) {
  469. $line = trim($line);
  470. if (preg_match('|^line \d+ column \d+ - Warning:|', $line)) {
  471. $warnings[] = $line;
  472. }
  473. else {
  474. $errors[] = $line;
  475. }
  476. }
  477. $output = $input;
  478. break;
  479. }
  480. // delete the temporary files.
  481. unlink($dirtyFilename);
  482. unlink($warningsFilename);
  483. return $return_value;
  484. }
  485. /**
  486. * Calculates a valid path to the HTML Tidy binary.
  487. *
  488. * @param $message
  489. * Assigned to an explanation.
  490. * @return
  491. * TRUE if found,
  492. * FALSE if error.
  493. */
  494. function beautify_htmltidy_test(&$message = '', &$version = '') {
  495. $fail = strpos(PHP_OS, 'WIN') === 0;
  496. $sub = drupal_get_path('module', 'beautify') . '/bin/tidy' . ($fail ? '.exe' : '');
  497. $path = variable_get('beautify_htmltidy_path', $sub);
  498. $test = FALSE;
  499. if (!@file_exists($path)) {
  500. // Test for binary in subdirectory first
  501. if (@file_exists($sub)) {
  502. $new_path = $sub;
  503. }
  504. elseif (!$fail) {
  505. // Not on Windows, so let's try which
  506. $which = shell_exec('which tidy');
  507. if (strpos($which, '/') === 0) {
  508. $new_path = $which;
  509. }
  510. }
  511. if (isset($new_path)) {
  512. variable_set('beautify_htmltidy_path', $new_path);
  513. $path = $new_path;
  514. $test = TRUE;
  515. }
  516. else {
  517. $message .= t('Could not find HTML Tidy binary.');
  518. }
  519. }
  520. if ($test) {
  521. $command = escapeshellcmd($path .' -v');
  522. if (exec($command, $response)) {
  523. $version = $response[0];
  524. return TRUE;
  525. }
  526. else {
  527. $message .= t('Found an HTML Tidy binary but it didn\'t seem to run properly. !command failed to respond correctly.',
  528. array('!command' => $command));
  529. return FALSE;
  530. }
  531. }
  532. }
  533. /**
  534. * Parse the HTML document into sections.
  535. *
  536. * @param $input
  537. * The buffered HTML to parse.
  538. * @return $output
  539. * An object containing:
  540. * $html_document->dtd
  541. * $html_document->doctype
  542. * $html_document->head
  543. * $html_document->body
  544. * $html_document->bodyattr
  545. *
  546. * TODO: For performance reasons make this function accept a string to ask for
  547. * a specific part of the document. We can still send the whole object by
  548. * default if still needed.
  549. */
  550. function beautify_parse_html($input) {
  551. // DTD
  552. $regex = '/(.*\.dtd">)/Umsi'; // just the doctype without the html tag.
  553. preg_match($regex, $input, $matches);
  554. $output->dtd = $matches[1];
  555. // DOCTYPE
  556. $regex = '/(.*<html.*>)/Umsi'; // doctype plus opening html tag with attributes.
  557. preg_match($regex, $input, $matches);
  558. isset($matches[1]) && $output->doctype = $matches[1] ."\n";
  559. // head
  560. $regex = '@(<head.*>.*</head\\s*>)@Umsi'; // the whole head section
  561. preg_match($regex, $input, $matches);
  562. if (isset($matches[1])) {
  563. $head = trim($matches[1]);
  564. $replace = array("\n ", "\n\n");
  565. $head = str_replace($replace, "\n", $head);
  566. $output->head = $head ."\n";
  567. }
  568. // body
  569. $regex = '@<body(.*)>(.*)</body\\s*>@Umsi'; // the whole body section
  570. preg_match($regex, $input, $matches);
  571. if (isset($matches[1]) && trim($matches[1])) {
  572. $output->bodyattr = trim($matches[1]); // body attributes
  573. }
  574. if (isset($matches[2])) {
  575. $output->body = trim($matches[2]);
  576. }
  577. else {
  578. $output->body = trim($input);
  579. }
  580. return $output;
  581. }
  582. /**
  583. * Built-in processing function for compacting or flattening the output.
  584. *
  585. * @param $input
  586. * The HTML output to process.
  587. * @param $mode
  588. * The mode of operation defined on beautify_get_options().
  589. * @return
  590. * The processed output.
  591. */
  592. function beautify_process($input, $mode) {
  593. switch ($mode) {
  594. case -1: // Comptact
  595. preg_match('/(.*)>/Umsi', $input, $matches); // doctype
  596. $no_doctype = str_replace($matches[0], '', $input);
  597. $output = preg_replace("@\n\s+@", " ", $no_doctype); // newline and 1 or more spaces
  598. $output = preg_replace("@\s\s\s+@", " ", $output); // three or more spaces
  599. $output = preg_replace("@>\n@", "> ", $output); // closing tag marker and a new line
  600. $output = $matches[0] . $output;
  601. break;
  602. case 2: // Flatten
  603. $output = preg_replace("@\n\s+@", "\n", $input); // a newline and some spaces
  604. $output = preg_replace("@\s\s+@", "", $output); // two or more spaces
  605. break;
  606. default:
  607. $output = $input;
  608. break;
  609. }
  610. return $output;
  611. }
  612. /**
  613. * Processeses the HTML document through htmLawed.
  614. *
  615. * @param $input
  616. * The buffered output to process.
  617. * @return $output
  618. * The processed output.
  619. */
  620. function beautify_htmlawed_process($input) {
  621. // Split the document into sections.
  622. $html_document = beautify_parse_html($input);
  623. $output = $html_document->doctype;
  624. // If compact mode is active we need to process the head since htmLawed won't
  625. // do that for us.
  626. $mode = variable_get('beautify_mode', 2);
  627. if ($mode == -1) { // Comptact
  628. $head = beautify_process($html_document->head, -1) ."\n";
  629. }
  630. else {
  631. $head .= beautify_process($html_document->head, 2) ."\n";
  632. }
  633. // Set up the config arguments.
  634. $config = array(
  635. 'balance' => variable_get('beautify_htmlawed_balance_tags', 1),
  636. 'valid_xhtml' => variable_get('beautify_htmlawed_valid_xhtml', 1),
  637. 'clean_ms_char' => variable_get('beautify_htmlawed_clean_msword', 0),
  638. 'comment' => variable_get('beautify_htmlawed_comments', 3),
  639. 'css_expression' => variable_get('beautify_htmlawed_css_expressions', 0),
  640. 'schemes' => '*:*',
  641. 'make_tag_strict' => variable_get('beautify_htmlawed_strict_tags', 1),
  642. 'keep_bad' => variable_get('beautify_htmlawed_keep_bad', 2),
  643. );
  644. // Our modes are slightly different to those accepted by htmLawed.
  645. if ($mode == 1) {
  646. $config['tidy'] = 2;
  647. }
  648. elseif ($mode == -1) {
  649. $config['tidy'] = -1;
  650. }
  651. // Process the body of the document through htmLawed.
  652. include_once 'htmLawed.php';
  653. $body = htmLawed($html_document->body, $config);
  654. // Concatenate the separate parts back together.
  655. $output .= $head .'<body '. $html_document->bodyattr .">\n". trim($body) ."\n</body>\n</html>";
  656. return $output;
  657. }
  658. /**
  659. * Process the buffered output through the HTML Tidy processor.
  660. *
  661. * @param $input
  662. * The buffered output to process.
  663. * @return $output
  664. * The processed output.
  665. */
  666. function beautify_htmltidy_process($input) {
  667. $output = beautify_htmltidy_command($input, $errors, $warnings);
  668. // Merge the errors and warnings together with the errors listed first.
  669. $errors = array_merge($errors, $warnings);
  670. // Here we're using HTML Tidy to flatten and then using the built-in comptact routine.
  671. $mode = variable_get('beautify_mode', 2);
  672. if ($mode == -1) {
  673. $output = beautify_process($input, $mode);
  674. }
  675. // TODO: Fix output of errors. Not working right now.
  676. if ($errors && variable_get('beautify_htmltidy_warnings', 0) && user_access('use beautify debug mode')) {
  677. $errors = array_map('htmlentities', $errors);
  678. $output .= theme('theme_beautify_htmltidy_errors', $errors);
  679. }
  680. return $output;
  681. }
  682. /**
  683. * Theme function for displaying the errors.
  684. *
  685. * @param $errors
  686. * An array of errors generated by HTML Tidy.
  687. * @return $output
  688. * The HTML for displaying the errors on the page.
  689. */
  690. function theme_beautify_htmltidy_errors($errors) {
  691. $output = '<div class="beautify-errors"><h3>Beautify Errors</h3>';
  692. $output .= theme('item_list', $errors);
  693. $output .= '</div>';
  694. return $output;
  695. }