Improved string diff viewer (#994)

This commit is contained in:
boatbomber
2025-11-18 20:26:44 -08:00
committed by GitHub
parent 31ec216a95
commit 071b6e7e23
8 changed files with 705 additions and 221 deletions

View File

@@ -1,3 +1,4 @@
--!strict
--[[
Based on DiffMatchPatch by Neil Fraser.
https://github.com/google/diff-match-patch
@@ -67,8 +68,187 @@ function StringDiff.findDiffs(text1: string, text2: string): Diffs
end
-- Cleanup the diff
diffs = StringDiff._cleanupSemantic(diffs)
diffs = StringDiff._reorderAndMerge(diffs)
-- Remove any empty diffs
local cursor = 1
while cursor and diffs[cursor] do
if diffs[cursor].value == "" then
table.remove(diffs, cursor)
else
cursor += 1
end
end
return diffs
end
function StringDiff._computeDiff(text1: string, text2: string): Diffs
-- Assumes that the prefix and suffix have already been trimmed off
-- and shortcut returns have been made so these texts must be different
local text1Length, text2Length = #text1, #text2
if text1Length == 0 then
-- It's simply inserting all of text2 into text1
return { { actionType = StringDiff.ActionTypes.Insert, value = text2 } }
end
if text2Length == 0 then
-- It's simply deleting all of text1
return { { actionType = StringDiff.ActionTypes.Delete, value = text1 } }
end
local longText = if text1Length > text2Length then text1 else text2
local shortText = if text1Length > text2Length then text2 else text1
local shortTextLength = #shortText
-- Shortcut if the shorter string exists entirely inside the longer one
local indexOf = if shortTextLength == 0 then nil else string.find(longText, shortText, 1, true)
if indexOf ~= nil then
local diffs = {
{ actionType = StringDiff.ActionTypes.Insert, value = string.sub(longText, 1, indexOf - 1) },
{ actionType = StringDiff.ActionTypes.Equal, value = shortText },
{ actionType = StringDiff.ActionTypes.Insert, value = string.sub(longText, indexOf + shortTextLength) },
}
-- Swap insertions for deletions if diff is reversed
if text1Length > text2Length then
diffs[1].actionType, diffs[3].actionType = StringDiff.ActionTypes.Delete, StringDiff.ActionTypes.Delete
end
return diffs
end
if shortTextLength == 1 then
-- Single character string
-- After the previous shortcut, the character can't be an equality
return {
{ actionType = StringDiff.ActionTypes.Delete, value = text1 },
{ actionType = StringDiff.ActionTypes.Insert, value = text2 },
}
end
return StringDiff._bisect(text1, text2)
end
function StringDiff._cleanupSemantic(diffs: Diffs): Diffs
-- Reduce the number of edits by eliminating semantically trivial equalities.
local changes = false
local equalities = {} -- Stack of indices where equalities are found.
local equalitiesLength = 0 -- Keeping our own length var is faster.
local lastEquality: string? = nil
-- Always equal to diffs[equalities[equalitiesLength]].value
local pointer = 1 -- Index of current position.
-- Number of characters that changed prior to the equality.
local length_insertions1 = 0
local length_deletions1 = 0
-- Number of characters that changed after the equality.
local length_insertions2 = 0
local length_deletions2 = 0
while diffs[pointer] do
if diffs[pointer].actionType == StringDiff.ActionTypes.Equal then -- Equality found.
equalitiesLength = equalitiesLength + 1
equalities[equalitiesLength] = pointer
length_insertions1 = length_insertions2
length_deletions1 = length_deletions2
length_insertions2 = 0
length_deletions2 = 0
lastEquality = diffs[pointer].value
else -- An insertion or deletion.
if diffs[pointer].actionType == StringDiff.ActionTypes.Insert then
length_insertions2 = length_insertions2 + #diffs[pointer].value
else
length_deletions2 = length_deletions2 + #diffs[pointer].value
end
-- Eliminate an equality that is smaller or equal to the edits on both
-- sides of it.
if
lastEquality
and (#lastEquality <= math.max(length_insertions1, length_deletions1))
and (#lastEquality <= math.max(length_insertions2, length_deletions2))
then
-- Duplicate record.
table.insert(
diffs,
equalities[equalitiesLength],
{ actionType = StringDiff.ActionTypes.Delete, value = lastEquality }
)
-- Change second copy to insert.
diffs[equalities[equalitiesLength] + 1].actionType = StringDiff.ActionTypes.Insert
-- Throw away the equality we just deleted.
equalitiesLength = equalitiesLength - 1
-- Throw away the previous equality (it needs to be reevaluated).
equalitiesLength = equalitiesLength - 1
pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
length_insertions1, length_deletions1 = 0, 0 -- Reset the counters.
length_insertions2, length_deletions2 = 0, 0
lastEquality = nil
changes = true
end
end
pointer = pointer + 1
end
-- Normalize the diff.
if changes then
StringDiff._reorderAndMerge(diffs)
end
StringDiff._cleanupSemanticLossless(diffs)
-- Find any overlaps between deletions and insertions.
-- e.g: <del>abcxxx</del><ins>xxxdef</ins>
-- -> <del>abc</del>xxx<ins>def</ins>
-- e.g: <del>xxxabc</del><ins>defxxx</ins>
-- -> <ins>def</ins>xxx<del>abc</del>
-- Only extract an overlap if it is as big as the edit ahead or behind it.
pointer = 2
while diffs[pointer] do
if
diffs[pointer - 1].actionType == StringDiff.ActionTypes.Delete
and diffs[pointer].actionType == StringDiff.ActionTypes.Insert
then
local deletion = diffs[pointer - 1].value
local insertion = diffs[pointer].value
local overlap_length1 = StringDiff._commonOverlap(deletion, insertion)
local overlap_length2 = StringDiff._commonOverlap(insertion, deletion)
if overlap_length1 >= overlap_length2 then
if overlap_length1 >= #deletion / 2 or overlap_length1 >= #insertion / 2 then
-- Overlap found. Insert an equality and trim the surrounding edits.
table.insert(
diffs,
pointer,
{ actionType = StringDiff.ActionTypes.Equal, value = string.sub(insertion, 1, overlap_length1) }
)
diffs[pointer - 1].value = string.sub(deletion, 1, #deletion - overlap_length1)
diffs[pointer + 1].value = string.sub(insertion, overlap_length1 + 1)
pointer = pointer + 1
end
else
if overlap_length2 >= #deletion / 2 or overlap_length2 >= #insertion / 2 then
-- Reverse overlap found.
-- Insert an equality and swap and trim the surrounding edits.
table.insert(
diffs,
pointer,
{ actionType = StringDiff.ActionTypes.Equal, value = string.sub(deletion, 1, overlap_length2) }
)
diffs[pointer - 1] = {
actionType = StringDiff.ActionTypes.Insert,
value = string.sub(insertion, 1, #insertion - overlap_length2),
}
diffs[pointer + 1] = {
actionType = StringDiff.ActionTypes.Delete,
value = string.sub(deletion, overlap_length2 + 1),
}
pointer = pointer + 1
end
end
pointer = pointer + 1
end
pointer = pointer + 1
end
return diffs
end
@@ -124,51 +304,164 @@ function StringDiff._sharedSuffix(text1: string, text2: string): number
return pointerMid
end
function StringDiff._computeDiff(text1: string, text2: string): Diffs
-- Assumes that the prefix and suffix have already been trimmed off
-- and shortcut returns have been made so these texts must be different
function StringDiff._commonOverlap(text1: string, text2: string): number
-- Determine if the suffix of one string is the prefix of another.
local text1Length, text2Length = #text1, #text2
if text1Length == 0 then
-- It's simply inserting all of text2 into text1
return { { actionType = StringDiff.ActionTypes.Insert, value = text2 } }
-- Cache the text lengths to prevent multiple calls.
local text1_length = #text1
local text2_length = #text2
-- Eliminate the null case.
if text1_length == 0 or text2_length == 0 then
return 0
end
-- Truncate the longer string.
if text1_length > text2_length then
text1 = string.sub(text1, text1_length - text2_length + 1)
elseif text1_length < text2_length then
text2 = string.sub(text2, 1, text1_length)
end
local text_length = math.min(text1_length, text2_length)
-- Quick check for the worst case.
if text1 == text2 then
return text_length
end
if text2Length == 0 then
-- It's simply deleting all of text1
return { { actionType = StringDiff.ActionTypes.Delete, value = text1 } }
end
local longText = if text1Length > text2Length then text1 else text2
local shortText = if text1Length > text2Length then text2 else text1
local shortTextLength = #shortText
-- Shortcut if the shorter string exists entirely inside the longer one
local indexOf = if shortTextLength == 0 then nil else string.find(longText, shortText, 1, true)
if indexOf ~= nil then
local diffs = {
{ actionType = StringDiff.ActionTypes.Insert, value = string.sub(longText, 1, indexOf - 1) },
{ actionType = StringDiff.ActionTypes.Equal, value = shortText },
{ actionType = StringDiff.ActionTypes.Insert, value = string.sub(longText, indexOf + shortTextLength) },
}
-- Swap insertions for deletions if diff is reversed
if text1Length > text2Length then
diffs[1].actionType, diffs[3].actionType = StringDiff.ActionTypes.Delete, StringDiff.ActionTypes.Delete
-- Start by looking for a single character match
-- and increase length until no match is found.
-- Performance analysis: https://neil.fraser.name/news/2010/11/04/
local best = 0
local length = 1
while true do
local pattern = string.sub(text1, text_length - length + 1)
local found = string.find(text2, pattern, 1, true)
if found == nil then
return best
end
return diffs
length = length + found - 1
if found == 1 or string.sub(text1, text_length - length + 1) == string.sub(text2, 1, length) then
best = length
length = length + 1
end
end
end
function StringDiff._cleanupSemanticScore(one: string, two: string): number
-- Given two strings, compute a score representing whether the internal
-- boundary falls on logical boundaries.
-- Scores range from 6 (best) to 0 (worst).
if (#one == 0) or (#two == 0) then
-- Edges are the best.
return 6
end
if shortTextLength == 1 then
-- Single character string
-- After the previous shortcut, the character can't be an equality
return {
{ actionType = StringDiff.ActionTypes.Delete, value = text1 },
{ actionType = StringDiff.ActionTypes.Insert, value = text2 },
}
end
-- Each port of this function behaves slightly differently due to
-- subtle differences in each language's definition of things like
-- 'whitespace'. Since this function's purpose is largely cosmetic,
-- the choice has been made to use each language's native features
-- rather than force total conformity.
local char1 = string.sub(one, -1)
local char2 = string.sub(two, 1, 1)
local nonAlphaNumeric1 = string.match(char1, "%W")
local nonAlphaNumeric2 = string.match(char2, "%W")
local whitespace1 = nonAlphaNumeric1 and string.match(char1, "%s")
local whitespace2 = nonAlphaNumeric2 and string.match(char2, "%s")
local lineBreak1 = whitespace1 and string.match(char1, "%c")
local lineBreak2 = whitespace2 and string.match(char2, "%c")
local blankLine1 = lineBreak1 and string.match(one, "\n\r?\n$")
local blankLine2 = lineBreak2 and string.match(two, "^\r?\n\r?\n")
return StringDiff._bisect(text1, text2)
if blankLine1 or blankLine2 then
-- Five points for blank lines.
return 5
elseif lineBreak1 or lineBreak2 then
-- Four points for line breaks
-- DEVIATION: Prefer to start on a line break instead of end on it
return if lineBreak1 then 4 else 4.5
elseif nonAlphaNumeric1 and not whitespace1 and whitespace2 then
-- Three points for end of sentences.
return 3
elseif whitespace1 or whitespace2 then
-- Two points for whitespace.
return 2
elseif nonAlphaNumeric1 or nonAlphaNumeric2 then
-- One point for non-alphanumeric.
return 1
end
return 0
end
function StringDiff._cleanupSemanticLossless(diffs: Diffs)
-- Look for single edits surrounded on both sides by equalities
-- which can be shifted sideways to align the edit to a word boundary.
-- e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
local pointer = 2
-- Intentionally ignore the first and last element (don't need checking).
while diffs[pointer + 1] do
local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
if
(prevDiff.actionType == StringDiff.ActionTypes.Equal)
and (nextDiff.actionType == StringDiff.ActionTypes.Equal)
then
-- This is a single edit surrounded by equalities.
local diff = diffs[pointer]
local equality1 = prevDiff.value
local edit = diff.value
local equality2 = nextDiff.value
-- First, shift the edit as far left as possible.
local commonOffset = StringDiff._sharedSuffix(equality1, edit)
if commonOffset > 0 then
local commonString = string.sub(edit, -commonOffset)
equality1 = string.sub(equality1, 1, -commonOffset - 1)
edit = commonString .. string.sub(edit, 1, -commonOffset - 1)
equality2 = commonString .. equality2
end
-- Second, step character by character right, looking for the best fit.
local bestEquality1 = equality1
local bestEdit = edit
local bestEquality2 = equality2
local bestScore = StringDiff._cleanupSemanticScore(equality1, edit)
+ StringDiff._cleanupSemanticScore(edit, equality2)
while string.byte(edit, 1) == string.byte(equality2, 1) do
equality1 = equality1 .. string.sub(edit, 1, 1)
edit = string.sub(edit, 2) .. string.sub(equality2, 1, 1)
equality2 = string.sub(equality2, 2)
local score = StringDiff._cleanupSemanticScore(equality1, edit)
+ StringDiff._cleanupSemanticScore(edit, equality2)
-- The > (rather than >=) encourages leading rather than trailing whitespace on edits.
-- I just think it looks better for indentation changes to start the line,
-- since then indenting several lines all have aligned diffs at the start
if score > bestScore then
bestScore = score
bestEquality1 = equality1
bestEdit = edit
bestEquality2 = equality2
end
end
if prevDiff.value ~= bestEquality1 then
-- We have an improvement, save it back to the diff.
if #bestEquality1 > 0 then
diffs[pointer - 1].value = bestEquality1
else
table.remove(diffs, pointer - 1)
pointer = pointer - 1
end
diffs[pointer].value = bestEdit
if #bestEquality2 > 0 then
diffs[pointer + 1].value = bestEquality2
else
table.remove(diffs, pointer + 1)
pointer = pointer - 1
end
end
end
pointer = pointer + 1
end
end
function StringDiff._bisect(text1: string, text2: string): Diffs