Minify.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. <?php
  2. /**
  3. * Abstract minifier class
  4. *
  5. * Please report bugs on https://github.com/matthiasmullie/minify/issues
  6. *
  7. * @author Matthias Mullie <minify@mullie.eu>
  8. * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
  9. * @license MIT License
  10. */
  11. namespace MatthiasMullie\Minify;
  12. use MatthiasMullie\Minify\Exceptions\IOException;
  13. use Psr\Cache\CacheItemInterface;
  14. /**
  15. * Abstract minifier class.
  16. *
  17. * Please report bugs on https://github.com/matthiasmullie/minify/issues
  18. *
  19. * @package Minify
  20. * @author Matthias Mullie <minify@mullie.eu>
  21. * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
  22. * @license MIT License
  23. */
  24. abstract class Minify
  25. {
  26. /**
  27. * The data to be minified.
  28. *
  29. * @var string[]
  30. */
  31. protected $data = array();
  32. /**
  33. * Array of patterns to match.
  34. *
  35. * @var string[]
  36. */
  37. protected $patterns = array();
  38. /**
  39. * This array will hold content of strings and regular expressions that have
  40. * been extracted from the JS source code, so we can reliably match "code",
  41. * without having to worry about potential "code-like" characters inside.
  42. *
  43. * @var string[]
  44. */
  45. public $extracted = array();
  46. /**
  47. * Init the minify class - optionally, code may be passed along already.
  48. */
  49. public function __construct(/* $data = null, ... */)
  50. {
  51. // it's possible to add the source through the constructor as well ;)
  52. if (func_num_args()) {
  53. call_user_func_array(array($this, 'add'), func_get_args());
  54. }
  55. }
  56. /**
  57. * Add a file or straight-up code to be minified.
  58. *
  59. * @param string|string[] $data
  60. *
  61. * @return static
  62. */
  63. public function add($data /* $data = null, ... */)
  64. {
  65. // bogus "usage" of parameter $data: scrutinizer warns this variable is
  66. // not used (we're using func_get_args instead to support overloading),
  67. // but it still needs to be defined because it makes no sense to have
  68. // this function without argument :)
  69. $args = array($data) + func_get_args();
  70. // this method can be overloaded
  71. foreach ($args as $data) {
  72. if (is_array($data)) {
  73. call_user_func_array(array($this, 'add'), $data);
  74. continue;
  75. }
  76. // redefine var
  77. $data = (string) $data;
  78. // load data
  79. $value = $this->load($data);
  80. $key = ($data != $value) ? $data : count($this->data);
  81. // replace CR linefeeds etc.
  82. // @see https://github.com/matthiasmullie/minify/pull/139
  83. $value = str_replace(array("\r\n", "\r"), "\n", $value);
  84. // store data
  85. $this->data[$key] = $value;
  86. }
  87. return $this;
  88. }
  89. /**
  90. * Add a file to be minified.
  91. *
  92. * @param string|string[] $data
  93. *
  94. * @return static
  95. *
  96. * @throws IOException
  97. */
  98. public function addFile($data /* $data = null, ... */)
  99. {
  100. // bogus "usage" of parameter $data: scrutinizer warns this variable is
  101. // not used (we're using func_get_args instead to support overloading),
  102. // but it still needs to be defined because it makes no sense to have
  103. // this function without argument :)
  104. $args = array($data) + func_get_args();
  105. // this method can be overloaded
  106. foreach ($args as $path) {
  107. if (is_array($path)) {
  108. call_user_func_array(array($this, 'addFile'), $path);
  109. continue;
  110. }
  111. // redefine var
  112. $path = (string) $path;
  113. // check if we can read the file
  114. if (!$this->canImportFile($path)) {
  115. throw new IOException('The file "'.$path.'" could not be opened for reading. Check if PHP has enough permissions.');
  116. }
  117. $this->add($path);
  118. }
  119. return $this;
  120. }
  121. /**
  122. * Minify the data & (optionally) saves it to a file.
  123. *
  124. * @param string[optional] $path Path to write the data to
  125. *
  126. * @return string The minified data
  127. */
  128. public function minify($path = null)
  129. {
  130. $content = $this->execute($path);
  131. // save to path
  132. if ($path !== null) {
  133. $this->save($content, $path);
  134. }
  135. return $content;
  136. }
  137. /**
  138. * Minify & gzip the data & (optionally) saves it to a file.
  139. *
  140. * @param string[optional] $path Path to write the data to
  141. * @param int[optional] $level Compression level, from 0 to 9
  142. *
  143. * @return string The minified & gzipped data
  144. */
  145. public function gzip($path = null, $level = 9)
  146. {
  147. $content = $this->execute($path);
  148. $content = gzencode($content, $level, FORCE_GZIP);
  149. // save to path
  150. if ($path !== null) {
  151. $this->save($content, $path);
  152. }
  153. return $content;
  154. }
  155. /**
  156. * Minify the data & write it to a CacheItemInterface object.
  157. *
  158. * @param CacheItemInterface $item Cache item to write the data to
  159. *
  160. * @return CacheItemInterface Cache item with the minifier data
  161. */
  162. public function cache(CacheItemInterface $item)
  163. {
  164. $content = $this->execute();
  165. $item->set($content);
  166. return $item;
  167. }
  168. /**
  169. * Minify the data.
  170. *
  171. * @param string[optional] $path Path to write the data to
  172. *
  173. * @return string The minified data
  174. */
  175. abstract public function execute($path = null);
  176. /**
  177. * Load data.
  178. *
  179. * @param string $data Either a path to a file or the content itself
  180. *
  181. * @return string
  182. */
  183. protected function load($data)
  184. {
  185. // check if the data is a file
  186. if ($this->canImportFile($data)) {
  187. $data = file_get_contents($data);
  188. // strip BOM, if any
  189. if (substr($data, 0, 3) == "\xef\xbb\xbf") {
  190. $data = substr($data, 3);
  191. }
  192. }
  193. return $data;
  194. }
  195. /**
  196. * Save to file.
  197. *
  198. * @param string $content The minified data
  199. * @param string $path The path to save the minified data to
  200. *
  201. * @throws IOException
  202. */
  203. protected function save($content, $path)
  204. {
  205. $handler = $this->openFileForWriting($path);
  206. $this->writeToFile($handler, $content);
  207. @fclose($handler);
  208. }
  209. /**
  210. * Register a pattern to execute against the source content.
  211. *
  212. * @param string $pattern PCRE pattern
  213. * @param string|callable $replacement Replacement value for matched pattern
  214. */
  215. protected function registerPattern($pattern, $replacement = '')
  216. {
  217. // study the pattern, we'll execute it more than once
  218. $pattern .= 'S';
  219. $this->patterns[] = array($pattern, $replacement);
  220. }
  221. /**
  222. * We can't "just" run some regular expressions against JavaScript: it's a
  223. * complex language. E.g. having an occurrence of // xyz would be a comment,
  224. * unless it's used within a string. Of you could have something that looks
  225. * like a 'string', but inside a comment.
  226. * The only way to accurately replace these pieces is to traverse the JS one
  227. * character at a time and try to find whatever starts first.
  228. *
  229. * @param string $content The content to replace patterns in
  230. *
  231. * @return string The (manipulated) content
  232. */
  233. protected function replace($content)
  234. {
  235. $processed = '';
  236. $positions = array_fill(0, count($this->patterns), -1);
  237. $matches = array();
  238. while ($content) {
  239. // find first match for all patterns
  240. foreach ($this->patterns as $i => $pattern) {
  241. list($pattern, $replacement) = $pattern;
  242. // we can safely ignore patterns for positions we've unset earlier,
  243. // because we know these won't show up anymore
  244. if (array_key_exists($i, $positions) == false) {
  245. continue;
  246. }
  247. // no need to re-run matches that are still in the part of the
  248. // content that hasn't been processed
  249. if ($positions[$i] >= 0) {
  250. continue;
  251. }
  252. $match = null;
  253. if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE)) {
  254. $matches[$i] = $match;
  255. // we'll store the match position as well; that way, we
  256. // don't have to redo all preg_matches after changing only
  257. // the first (we'll still know where those others are)
  258. $positions[$i] = $match[0][1];
  259. } else {
  260. // if the pattern couldn't be matched, there's no point in
  261. // executing it again in later runs on this same content;
  262. // ignore this one until we reach end of content
  263. unset($matches[$i], $positions[$i]);
  264. }
  265. }
  266. // no more matches to find: everything's been processed, break out
  267. if (!$matches) {
  268. $processed .= $content;
  269. break;
  270. }
  271. // see which of the patterns actually found the first thing (we'll
  272. // only want to execute that one, since we're unsure if what the
  273. // other found was not inside what the first found)
  274. $discardLength = min($positions);
  275. $firstPattern = array_search($discardLength, $positions);
  276. $match = $matches[$firstPattern][0][0];
  277. // execute the pattern that matches earliest in the content string
  278. list($pattern, $replacement) = $this->patterns[$firstPattern];
  279. $replacement = $this->replacePattern($pattern, $replacement, $content);
  280. // figure out which part of the string was unmatched; that's the
  281. // part we'll execute the patterns on again next
  282. $content = (string) substr($content, $discardLength);
  283. $unmatched = (string) substr($content, strpos($content, $match) + strlen($match));
  284. // move the replaced part to $processed and prepare $content to
  285. // again match batch of patterns against
  286. $processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched));
  287. $content = $unmatched;
  288. // first match has been replaced & that content is to be left alone,
  289. // the next matches will start after this replacement, so we should
  290. // fix their offsets
  291. foreach ($positions as $i => $position) {
  292. $positions[$i] -= $discardLength + strlen($match);
  293. }
  294. }
  295. return $processed;
  296. }
  297. /**
  298. * This is where a pattern is matched against $content and the matches
  299. * are replaced by their respective value.
  300. * This function will be called plenty of times, where $content will always
  301. * move up 1 character.
  302. *
  303. * @param string $pattern Pattern to match
  304. * @param string|callable $replacement Replacement value
  305. * @param string $content Content to match pattern against
  306. *
  307. * @return string
  308. */
  309. protected function replacePattern($pattern, $replacement, $content)
  310. {
  311. if (is_callable($replacement)) {
  312. return preg_replace_callback($pattern, $replacement, $content, 1, $count);
  313. } else {
  314. return preg_replace($pattern, $replacement, $content, 1, $count);
  315. }
  316. }
  317. /**
  318. * Strings are a pattern we need to match, in order to ignore potential
  319. * code-like content inside them, but we just want all of the string
  320. * content to remain untouched.
  321. *
  322. * This method will replace all string content with simple STRING#
  323. * placeholder text, so we've rid all strings from characters that may be
  324. * misinterpreted. Original string content will be saved in $this->extracted
  325. * and after doing all other minifying, we can restore the original content
  326. * via restoreStrings().
  327. *
  328. * @param string[optional] $chars
  329. * @param string[optional] $placeholderPrefix
  330. */
  331. protected function extractStrings($chars = '\'"', $placeholderPrefix = '')
  332. {
  333. // PHP only supports $this inside anonymous functions since 5.4
  334. $minifier = $this;
  335. $callback = function ($match) use ($minifier, $placeholderPrefix) {
  336. // check the second index here, because the first always contains a quote
  337. if ($match[2] === '') {
  338. /*
  339. * Empty strings need no placeholder; they can't be confused for
  340. * anything else anyway.
  341. * But we still needed to match them, for the extraction routine
  342. * to skip over this particular string.
  343. */
  344. return $match[0];
  345. }
  346. $count = count($minifier->extracted);
  347. $placeholder = $match[1].$placeholderPrefix.$count.$match[1];
  348. $minifier->extracted[$placeholder] = $match[1].$match[2].$match[1];
  349. return $placeholder;
  350. };
  351. /*
  352. * The \\ messiness explained:
  353. * * Don't count ' or " as end-of-string if it's escaped (has backslash
  354. * in front of it)
  355. * * Unless... that backslash itself is escaped (another leading slash),
  356. * in which case it's no longer escaping the ' or "
  357. * * So there can be either no backslash, or an even number
  358. * * multiply all of that times 4, to account for the escaping that has
  359. * to be done to pass the backslash into the PHP string without it being
  360. * considered as escape-char (times 2) and to get it in the regex,
  361. * escaped (times 2)
  362. */
  363. $this->registerPattern('/(['.$chars.'])(.*?(?<!\\\\)(\\\\\\\\)*+)\\1/s', $callback);
  364. }
  365. /**
  366. * This method will restore all extracted data (strings, regexes) that were
  367. * replaced with placeholder text in extract*(). The original content was
  368. * saved in $this->extracted.
  369. *
  370. * @param string $content
  371. *
  372. * @return string
  373. */
  374. protected function restoreExtractedData($content)
  375. {
  376. if (!$this->extracted) {
  377. // nothing was extracted, nothing to restore
  378. return $content;
  379. }
  380. $content = strtr($content, $this->extracted);
  381. $this->extracted = array();
  382. return $content;
  383. }
  384. /**
  385. * Check if the path is a regular file and can be read.
  386. *
  387. * @param string $path
  388. *
  389. * @return bool
  390. */
  391. protected function canImportFile($path)
  392. {
  393. $parsed = parse_url($path);
  394. if (
  395. // file is elsewhere
  396. isset($parsed['host']) ||
  397. // file responds to queries (may change, or need to bypass cache)
  398. isset($parsed['query'])
  399. ) {
  400. return false;
  401. }
  402. return strlen($path) < PHP_MAXPATHLEN && @is_file($path) && is_readable($path);
  403. }
  404. /**
  405. * Attempts to open file specified by $path for writing.
  406. *
  407. * @param string $path The path to the file
  408. *
  409. * @return resource Specifier for the target file
  410. *
  411. * @throws IOException
  412. */
  413. protected function openFileForWriting($path)
  414. {
  415. if ($path === '' || ($handler = @fopen($path, 'w')) === false) {
  416. throw new IOException('The file "'.$path.'" could not be opened for writing. Check if PHP has enough permissions.');
  417. }
  418. return $handler;
  419. }
  420. /**
  421. * Attempts to write $content to the file specified by $handler. $path is used for printing exceptions.
  422. *
  423. * @param resource $handler The resource to write to
  424. * @param string $content The content to write
  425. * @param string $path The path to the file (for exception printing only)
  426. *
  427. * @throws IOException
  428. */
  429. protected function writeToFile($handler, $content, $path = '')
  430. {
  431. if (
  432. !is_resource($handler) ||
  433. ($result = @fwrite($handler, $content)) === false ||
  434. ($result < strlen($content))
  435. ) {
  436. throw new IOException('The file "'.$path.'" could not be written to. Check your disk space and file permissions.');
  437. }
  438. }
  439. }