diff.js 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. /* See LICENSE file for terms of use */
  2. /*
  3. * Text diff implementation.
  4. *
  5. * This library supports the following APIS:
  6. * JsDiff.diffChars: Character by character diff
  7. * JsDiff.diffWords: Word (as defined by \b regex) diff which ignores whitespace
  8. * JsDiff.diffLines: Line based diff
  9. *
  10. * JsDiff.diffCss: Diff targeted at CSS content
  11. *
  12. * These methods are based on the implementation proposed in
  13. * "An O(ND) Difference Algorithm and its Variations" (Myers, 1986).
  14. * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927
  15. */
  16. (function(global, undefined) {
  17. var objectPrototypeToString = Object.prototype.toString;
  18. /*istanbul ignore next*/
  19. function map(arr, mapper, that) {
  20. if (Array.prototype.map) {
  21. return Array.prototype.map.call(arr, mapper, that);
  22. }
  23. var other = new Array(arr.length);
  24. for (var i = 0, n = arr.length; i < n; i++) {
  25. other[i] = mapper.call(that, arr[i], i, arr);
  26. }
  27. return other;
  28. }
  29. function clonePath(path) {
  30. return { newPos: path.newPos, components: path.components.slice(0) };
  31. }
  32. function removeEmpty(array) {
  33. var ret = [];
  34. for (var i = 0; i < array.length; i++) {
  35. if (array[i]) {
  36. ret.push(array[i]);
  37. }
  38. }
  39. return ret;
  40. }
  41. function escapeHTML(s) {
  42. var n = s;
  43. n = n.replace(/&/g, '&amp;');
  44. n = n.replace(/</g, '&lt;');
  45. n = n.replace(/>/g, '&gt;');
  46. n = n.replace(/"/g, '&quot;');
  47. return n;
  48. }
  49. // This function handles the presence of circular references by bailing out when encountering an
  50. // object that is already on the "stack" of items being processed.
  51. function canonicalize(obj, stack, replacementStack) {
  52. stack = stack || [];
  53. replacementStack = replacementStack || [];
  54. var i;
  55. for (i = 0; i < stack.length; i += 1) {
  56. if (stack[i] === obj) {
  57. return replacementStack[i];
  58. }
  59. }
  60. var canonicalizedObj;
  61. if ('[object Array]' === objectPrototypeToString.call(obj)) {
  62. stack.push(obj);
  63. canonicalizedObj = new Array(obj.length);
  64. replacementStack.push(canonicalizedObj);
  65. for (i = 0; i < obj.length; i += 1) {
  66. canonicalizedObj[i] = canonicalize(obj[i], stack, replacementStack);
  67. }
  68. stack.pop();
  69. replacementStack.pop();
  70. } else if (typeof obj === 'object' && obj !== null) {
  71. stack.push(obj);
  72. canonicalizedObj = {};
  73. replacementStack.push(canonicalizedObj);
  74. var sortedKeys = [],
  75. key;
  76. for (key in obj) {
  77. sortedKeys.push(key);
  78. }
  79. sortedKeys.sort();
  80. for (i = 0; i < sortedKeys.length; i += 1) {
  81. key = sortedKeys[i];
  82. canonicalizedObj[key] = canonicalize(obj[key], stack, replacementStack);
  83. }
  84. stack.pop();
  85. replacementStack.pop();
  86. } else {
  87. canonicalizedObj = obj;
  88. }
  89. return canonicalizedObj;
  90. }
  91. function buildValues(components, newString, oldString, useLongestToken) {
  92. var componentPos = 0,
  93. componentLen = components.length,
  94. newPos = 0,
  95. oldPos = 0;
  96. for (; componentPos < componentLen; componentPos++) {
  97. var component = components[componentPos];
  98. if (!component.removed) {
  99. if (!component.added && useLongestToken) {
  100. var value = newString.slice(newPos, newPos + component.count);
  101. value = map(value, function(value, i) {
  102. var oldValue = oldString[oldPos + i];
  103. return oldValue.length > value.length ? oldValue : value;
  104. });
  105. component.value = value.join('');
  106. } else {
  107. component.value = newString.slice(newPos, newPos + component.count).join('');
  108. }
  109. newPos += component.count;
  110. // Common case
  111. if (!component.added) {
  112. oldPos += component.count;
  113. }
  114. } else {
  115. component.value = oldString.slice(oldPos, oldPos + component.count).join('');
  116. oldPos += component.count;
  117. // Reverse add and remove so removes are output first to match common convention
  118. // The diffing algorithm is tied to add then remove output and this is the simplest
  119. // route to get the desired output with minimal overhead.
  120. if (componentPos && components[componentPos - 1].added) {
  121. var tmp = components[componentPos - 1];
  122. components[componentPos - 1] = components[componentPos];
  123. components[componentPos] = tmp;
  124. }
  125. }
  126. }
  127. return components;
  128. }
  129. function Diff(ignoreWhitespace) {
  130. this.ignoreWhitespace = ignoreWhitespace;
  131. }
  132. Diff.prototype = {
  133. diff: function(oldString, newString, callback) {
  134. var self = this;
  135. function done(value) {
  136. if (callback) {
  137. setTimeout(function() { callback(undefined, value); }, 0);
  138. return true;
  139. } else {
  140. return value;
  141. }
  142. }
  143. // Handle the identity case (this is due to unrolling editLength == 0
  144. if (newString === oldString) {
  145. return done([{ value: newString }]);
  146. }
  147. if (!newString) {
  148. return done([{ value: oldString, removed: true }]);
  149. }
  150. if (!oldString) {
  151. return done([{ value: newString, added: true }]);
  152. }
  153. newString = this.tokenize(newString);
  154. oldString = this.tokenize(oldString);
  155. var newLen = newString.length, oldLen = oldString.length;
  156. var editLength = 1;
  157. var maxEditLength = newLen + oldLen;
  158. var bestPath = [{ newPos: -1, components: [] }];
  159. // Seed editLength = 0, i.e. the content starts with the same values
  160. var oldPos = this.extractCommon(bestPath[0], newString, oldString, 0);
  161. if (bestPath[0].newPos + 1 >= newLen && oldPos + 1 >= oldLen) {
  162. // Identity per the equality and tokenizer
  163. return done([{value: newString.join('')}]);
  164. }
  165. // Main worker method. checks all permutations of a given edit length for acceptance.
  166. function execEditLength() {
  167. for (var diagonalPath = -1 * editLength; diagonalPath <= editLength; diagonalPath += 2) {
  168. var basePath;
  169. var addPath = bestPath[diagonalPath - 1],
  170. removePath = bestPath[diagonalPath + 1],
  171. oldPos = (removePath ? removePath.newPos : 0) - diagonalPath;
  172. if (addPath) {
  173. // No one else is going to attempt to use this value, clear it
  174. bestPath[diagonalPath - 1] = undefined;
  175. }
  176. var canAdd = addPath && addPath.newPos + 1 < newLen,
  177. canRemove = removePath && 0 <= oldPos && oldPos < oldLen;
  178. if (!canAdd && !canRemove) {
  179. // If this path is a terminal then prune
  180. bestPath[diagonalPath] = undefined;
  181. continue;
  182. }
  183. // Select the diagonal that we want to branch from. We select the prior
  184. // path whose position in the new string is the farthest from the origin
  185. // and does not pass the bounds of the diff graph
  186. if (!canAdd || (canRemove && addPath.newPos < removePath.newPos)) {
  187. basePath = clonePath(removePath);
  188. self.pushComponent(basePath.components, undefined, true);
  189. } else {
  190. basePath = addPath; // No need to clone, we've pulled it from the list
  191. basePath.newPos++;
  192. self.pushComponent(basePath.components, true, undefined);
  193. }
  194. oldPos = self.extractCommon(basePath, newString, oldString, diagonalPath);
  195. // If we have hit the end of both strings, then we are done
  196. if (basePath.newPos + 1 >= newLen && oldPos + 1 >= oldLen) {
  197. return done(buildValues(basePath.components, newString, oldString, self.useLongestToken));
  198. } else {
  199. // Otherwise track this path as a potential candidate and continue.
  200. bestPath[diagonalPath] = basePath;
  201. }
  202. }
  203. editLength++;
  204. }
  205. // Performs the length of edit iteration. Is a bit fugly as this has to support the
  206. // sync and async mode which is never fun. Loops over execEditLength until a value
  207. // is produced.
  208. if (callback) {
  209. (function exec() {
  210. setTimeout(function() {
  211. // This should not happen, but we want to be safe.
  212. /*istanbul ignore next */
  213. if (editLength > maxEditLength) {
  214. return callback();
  215. }
  216. if (!execEditLength()) {
  217. exec();
  218. }
  219. }, 0);
  220. }());
  221. } else {
  222. while (editLength <= maxEditLength) {
  223. var ret = execEditLength();
  224. if (ret) {
  225. return ret;
  226. }
  227. }
  228. }
  229. },
  230. pushComponent: function(components, added, removed) {
  231. var last = components[components.length - 1];
  232. if (last && last.added === added && last.removed === removed) {
  233. // We need to clone here as the component clone operation is just
  234. // as shallow array clone
  235. components[components.length - 1] = {count: last.count + 1, added: added, removed: removed };
  236. } else {
  237. components.push({count: 1, added: added, removed: removed });
  238. }
  239. },
  240. extractCommon: function(basePath, newString, oldString, diagonalPath) {
  241. var newLen = newString.length,
  242. oldLen = oldString.length,
  243. newPos = basePath.newPos,
  244. oldPos = newPos - diagonalPath,
  245. commonCount = 0;
  246. while (newPos + 1 < newLen && oldPos + 1 < oldLen && this.equals(newString[newPos + 1], oldString[oldPos + 1])) {
  247. newPos++;
  248. oldPos++;
  249. commonCount++;
  250. }
  251. if (commonCount) {
  252. basePath.components.push({count: commonCount});
  253. }
  254. basePath.newPos = newPos;
  255. return oldPos;
  256. },
  257. equals: function(left, right) {
  258. var reWhitespace = /\S/;
  259. return left === right || (this.ignoreWhitespace && !reWhitespace.test(left) && !reWhitespace.test(right));
  260. },
  261. tokenize: function(value) {
  262. return value.split('');
  263. }
  264. };
  265. var CharDiff = new Diff();
  266. var WordDiff = new Diff(true);
  267. var WordWithSpaceDiff = new Diff();
  268. WordDiff.tokenize = WordWithSpaceDiff.tokenize = function(value) {
  269. return removeEmpty(value.split(/(\s+|\b)/));
  270. };
  271. var CssDiff = new Diff(true);
  272. CssDiff.tokenize = function(value) {
  273. return removeEmpty(value.split(/([{}:;,]|\s+)/));
  274. };
  275. var LineDiff = new Diff();
  276. var TrimmedLineDiff = new Diff();
  277. TrimmedLineDiff.ignoreTrim = true;
  278. LineDiff.tokenize = TrimmedLineDiff.tokenize = function(value) {
  279. var retLines = [],
  280. lines = value.split(/^/m);
  281. for (var i = 0; i < lines.length; i++) {
  282. var line = lines[i],
  283. lastLine = lines[i - 1],
  284. lastLineLastChar = lastLine && lastLine[lastLine.length - 1];
  285. // Merge lines that may contain windows new lines
  286. if (line === '\n' && lastLineLastChar === '\r') {
  287. retLines[retLines.length - 1] = retLines[retLines.length - 1].slice(0, -1) + '\r\n';
  288. } else {
  289. if (this.ignoreTrim) {
  290. line = line.trim();
  291. // add a newline unless this is the last line.
  292. if (i < lines.length - 1) {
  293. line += '\n';
  294. }
  295. }
  296. retLines.push(line);
  297. }
  298. }
  299. return retLines;
  300. };
  301. var PatchDiff = new Diff();
  302. PatchDiff.tokenize = function(value) {
  303. var ret = [],
  304. linesAndNewlines = value.split(/(\n|\r\n)/);
  305. // Ignore the final empty token that occurs if the string ends with a new line
  306. if (!linesAndNewlines[linesAndNewlines.length - 1]) {
  307. linesAndNewlines.pop();
  308. }
  309. // Merge the content and line separators into single tokens
  310. for (var i = 0; i < linesAndNewlines.length; i++) {
  311. var line = linesAndNewlines[i];
  312. if (i % 2) {
  313. ret[ret.length - 1] += line;
  314. } else {
  315. ret.push(line);
  316. }
  317. }
  318. return ret;
  319. };
  320. var SentenceDiff = new Diff();
  321. SentenceDiff.tokenize = function(value) {
  322. return removeEmpty(value.split(/(\S.+?[.!?])(?=\s+|$)/));
  323. };
  324. var JsonDiff = new Diff();
  325. // Discriminate between two lines of pretty-printed, serialized JSON where one of them has a
  326. // dangling comma and the other doesn't. Turns out including the dangling comma yields the nicest output:
  327. JsonDiff.useLongestToken = true;
  328. JsonDiff.tokenize = LineDiff.tokenize;
  329. JsonDiff.equals = function(left, right) {
  330. return LineDiff.equals(left.replace(/,([\r\n])/g, '$1'), right.replace(/,([\r\n])/g, '$1'));
  331. };
  332. var JsDiff = {
  333. Diff: Diff,
  334. diffChars: function(oldStr, newStr, callback) { return CharDiff.diff(oldStr, newStr, callback); },
  335. diffWords: function(oldStr, newStr, callback) { return WordDiff.diff(oldStr, newStr, callback); },
  336. diffWordsWithSpace: function(oldStr, newStr, callback) { return WordWithSpaceDiff.diff(oldStr, newStr, callback); },
  337. diffLines: function(oldStr, newStr, callback) { return LineDiff.diff(oldStr, newStr, callback); },
  338. diffTrimmedLines: function(oldStr, newStr, callback) { return TrimmedLineDiff.diff(oldStr, newStr, callback); },
  339. diffSentences: function(oldStr, newStr, callback) { return SentenceDiff.diff(oldStr, newStr, callback); },
  340. diffCss: function(oldStr, newStr, callback) { return CssDiff.diff(oldStr, newStr, callback); },
  341. diffJson: function(oldObj, newObj, callback) {
  342. return JsonDiff.diff(
  343. typeof oldObj === 'string' ? oldObj : JSON.stringify(canonicalize(oldObj), undefined, ' '),
  344. typeof newObj === 'string' ? newObj : JSON.stringify(canonicalize(newObj), undefined, ' '),
  345. callback
  346. );
  347. },
  348. createTwoFilesPatch: function(oldFileName, newFileName, oldStr, newStr, oldHeader, newHeader) {
  349. var ret = [];
  350. if (oldFileName == newFileName) {
  351. ret.push('Index: ' + oldFileName);
  352. }
  353. ret.push('===================================================================');
  354. ret.push('--- ' + oldFileName + (typeof oldHeader === 'undefined' ? '' : '\t' + oldHeader));
  355. ret.push('+++ ' + newFileName + (typeof newHeader === 'undefined' ? '' : '\t' + newHeader));
  356. var diff = PatchDiff.diff(oldStr, newStr);
  357. diff.push({value: '', lines: []}); // Append an empty value to make cleanup easier
  358. // Formats a given set of lines for printing as context lines in a patch
  359. function contextLines(lines) {
  360. return map(lines, function(entry) { return ' ' + entry; });
  361. }
  362. // Outputs the no newline at end of file warning if needed
  363. function eofNL(curRange, i, current) {
  364. var last = diff[diff.length - 2],
  365. isLast = i === diff.length - 2,
  366. isLastOfType = i === diff.length - 3 && current.added !== last.added;
  367. // Figure out if this is the last line for the given file and missing NL
  368. if (!(/\n$/.test(current.value)) && (isLast || isLastOfType)) {
  369. curRange.push('\\ No newline at end of file');
  370. }
  371. }
  372. var oldRangeStart = 0, newRangeStart = 0, curRange = [],
  373. oldLine = 1, newLine = 1;
  374. for (var i = 0; i < diff.length; i++) {
  375. var current = diff[i],
  376. lines = current.lines || current.value.replace(/\n$/, '').split('\n');
  377. current.lines = lines;
  378. if (current.added || current.removed) {
  379. // If we have previous context, start with that
  380. if (!oldRangeStart) {
  381. var prev = diff[i - 1];
  382. oldRangeStart = oldLine;
  383. newRangeStart = newLine;
  384. if (prev) {
  385. curRange = contextLines(prev.lines.slice(-4));
  386. oldRangeStart -= curRange.length;
  387. newRangeStart -= curRange.length;
  388. }
  389. }
  390. // Output our changes
  391. curRange.push.apply(curRange, map(lines, function(entry) {
  392. return (current.added ? '+' : '-') + entry;
  393. }));
  394. eofNL(curRange, i, current);
  395. // Track the updated file position
  396. if (current.added) {
  397. newLine += lines.length;
  398. } else {
  399. oldLine += lines.length;
  400. }
  401. } else {
  402. // Identical context lines. Track line changes
  403. if (oldRangeStart) {
  404. // Close out any changes that have been output (or join overlapping)
  405. if (lines.length <= 8 && i < diff.length - 2) {
  406. // Overlapping
  407. curRange.push.apply(curRange, contextLines(lines));
  408. } else {
  409. // end the range and output
  410. var contextSize = Math.min(lines.length, 4);
  411. ret.push(
  412. '@@ -' + oldRangeStart + ',' + (oldLine - oldRangeStart + contextSize)
  413. + ' +' + newRangeStart + ',' + (newLine - newRangeStart + contextSize)
  414. + ' @@');
  415. ret.push.apply(ret, curRange);
  416. ret.push.apply(ret, contextLines(lines.slice(0, contextSize)));
  417. if (lines.length <= 4) {
  418. eofNL(ret, i, current);
  419. }
  420. oldRangeStart = 0;
  421. newRangeStart = 0;
  422. curRange = [];
  423. }
  424. }
  425. oldLine += lines.length;
  426. newLine += lines.length;
  427. }
  428. }
  429. return ret.join('\n') + '\n';
  430. },
  431. createPatch: function(fileName, oldStr, newStr, oldHeader, newHeader) {
  432. return JsDiff.createTwoFilesPatch(fileName, fileName, oldStr, newStr, oldHeader, newHeader);
  433. },
  434. applyPatch: function(oldStr, uniDiff) {
  435. var diffstr = uniDiff.split('\n'),
  436. hunks = [],
  437. i = 0,
  438. remEOFNL = false,
  439. addEOFNL = false;
  440. // Skip to the first change hunk
  441. while (i < diffstr.length && !(/^@@/.test(diffstr[i]))) {
  442. i++;
  443. }
  444. // Parse the unified diff
  445. for (; i < diffstr.length; i++) {
  446. if (diffstr[i][0] === '@') {
  447. var chnukHeader = diffstr[i].split(/@@ -(\d+),(\d+) \+(\d+),(\d+) @@/);
  448. hunks.unshift({
  449. start: chnukHeader[3],
  450. oldlength: +chnukHeader[2],
  451. removed: [],
  452. newlength: chnukHeader[4],
  453. added: []
  454. });
  455. } else if (diffstr[i][0] === '+') {
  456. hunks[0].added.push(diffstr[i].substr(1));
  457. } else if (diffstr[i][0] === '-') {
  458. hunks[0].removed.push(diffstr[i].substr(1));
  459. } else if (diffstr[i][0] === ' ') {
  460. hunks[0].added.push(diffstr[i].substr(1));
  461. hunks[0].removed.push(diffstr[i].substr(1));
  462. } else if (diffstr[i][0] === '\\') {
  463. if (diffstr[i - 1][0] === '+') {
  464. remEOFNL = true;
  465. } else if (diffstr[i - 1][0] === '-') {
  466. addEOFNL = true;
  467. }
  468. }
  469. }
  470. // Apply the diff to the input
  471. var lines = oldStr.split('\n');
  472. for (i = hunks.length - 1; i >= 0; i--) {
  473. var hunk = hunks[i];
  474. // Sanity check the input string. Bail if we don't match.
  475. for (var j = 0; j < hunk.oldlength; j++) {
  476. if (lines[hunk.start - 1 + j] !== hunk.removed[j]) {
  477. return false;
  478. }
  479. }
  480. Array.prototype.splice.apply(lines, [hunk.start - 1, hunk.oldlength].concat(hunk.added));
  481. }
  482. // Handle EOFNL insertion/removal
  483. if (remEOFNL) {
  484. while (!lines[lines.length - 1]) {
  485. lines.pop();
  486. }
  487. } else if (addEOFNL) {
  488. lines.push('');
  489. }
  490. return lines.join('\n');
  491. },
  492. convertChangesToXML: function(changes) {
  493. var ret = [];
  494. for (var i = 0; i < changes.length; i++) {
  495. var change = changes[i];
  496. if (change.added) {
  497. ret.push('<ins>');
  498. } else if (change.removed) {
  499. ret.push('<del>');
  500. }
  501. ret.push(escapeHTML(change.value));
  502. if (change.added) {
  503. ret.push('</ins>');
  504. } else if (change.removed) {
  505. ret.push('</del>');
  506. }
  507. }
  508. return ret.join('');
  509. },
  510. // See: http://code.google.com/p/google-diff-match-patch/wiki/API
  511. convertChangesToDMP: function(changes) {
  512. var ret = [],
  513. change,
  514. operation;
  515. for (var i = 0; i < changes.length; i++) {
  516. change = changes[i];
  517. if (change.added) {
  518. operation = 1;
  519. } else if (change.removed) {
  520. operation = -1;
  521. } else {
  522. operation = 0;
  523. }
  524. ret.push([operation, change.value]);
  525. }
  526. return ret;
  527. },
  528. canonicalize: canonicalize
  529. };
  530. /*istanbul ignore next */
  531. /*global module */
  532. if (typeof module !== 'undefined' && module.exports) {
  533. module.exports = JsDiff;
  534. } else if (typeof define === 'function' && define.amd) {
  535. /*global define */
  536. define([], function() { return JsDiff; });
  537. } else if (typeof global.JsDiff === 'undefined') {
  538. global.JsDiff = JsDiff;
  539. }
  540. }(this));