--[[
Based on DiffMatchPatch by Neil Fraser.
https://github.com/google/diff-match-patch
]]
export type DiffAction = number
export type Diff = { actionType: DiffAction, value: string }
export type Diffs = { Diff }
local StringDiff = {
ActionTypes = table.freeze({
Equal = 0,
Delete = 1,
Insert = 2,
}),
}
function StringDiff.findDiffs(text1: string, text2: string): Diffs
-- Validate inputs
if type(text1) ~= "string" or type(text2) ~= "string" then
error(
string.format(
"Invalid inputs to StringDiff.findDiffs, expected strings and got (%s, %s)",
type(text1),
type(text2)
),
2
)
end
-- Shortcut if the texts are identical
if text1 == text2 then
return { { actionType = StringDiff.ActionTypes.Equal, value = text1 } }
end
-- Trim off any shared prefix and suffix
-- These are easy to detect and can be dealt with quickly without needing a complex diff
-- and later we simply add them as Equal to the start and end of the diff
local sharedPrefix, sharedSuffix
local prefixLength = StringDiff._sharedPrefix(text1, text2)
if prefixLength > 0 then
-- Store the prefix
sharedPrefix = string.sub(text1, 1, prefixLength)
-- Now trim it off
text1 = string.sub(text1, prefixLength + 1)
text2 = string.sub(text2, prefixLength + 1)
end
local suffixLength = StringDiff._sharedSuffix(text1, text2)
if suffixLength > 0 then
-- Store the suffix
sharedSuffix = string.sub(text1, -suffixLength)
-- Now trim it off
text1 = string.sub(text1, 1, -suffixLength - 1)
text2 = string.sub(text2, 1, -suffixLength - 1)
end
-- Compute the diff on the middle block where the changes lie
local diffs = StringDiff._computeDiff(text1, text2)
-- Restore the prefix and suffix
if sharedPrefix then
table.insert(diffs, 1, { actionType = StringDiff.ActionTypes.Equal, value = sharedPrefix })
end
if sharedSuffix then
table.insert(diffs, { actionType = StringDiff.ActionTypes.Equal, value = sharedSuffix })
end
-- Cleanup the diff
diffs = StringDiff._reorderAndMerge(diffs)
return diffs
end
function StringDiff._sharedPrefix(text1: string, text2: string): number
-- Uses a binary search to find the largest common prefix between the two strings
-- Performance analysis: http://neil.fraser.name/news/2007/10/09/
-- Shortcut common cases
if (#text1 == 0) or (#text2 == 0) or (string.byte(text1, 1) ~= string.byte(text2, 1)) then
return 0
end
local pointerMin = 1
local pointerMax = math.min(#text1, #text2)
local pointerMid = pointerMax
local pointerStart = 1
while pointerMin < pointerMid do
if string.sub(text1, pointerStart, pointerMid) == string.sub(text2, pointerStart, pointerMid) then
pointerMin = pointerMid
pointerStart = pointerMin
else
pointerMax = pointerMid
end
pointerMid = math.floor(pointerMin + (pointerMax - pointerMin) / 2)
end
return pointerMid
end
function StringDiff._sharedSuffix(text1: string, text2: string): number
-- Uses a binary search to find the largest common suffix between the two strings
-- Performance analysis: http://neil.fraser.name/news/2007/10/09/
-- Shortcut common cases
if (#text1 == 0) or (#text2 == 0) or (string.byte(text1, -1) ~= string.byte(text2, -1)) then
return 0
end
local pointerMin = 1
local pointerMax = math.min(#text1, #text2)
local pointerMid = pointerMax
local pointerEnd = 1
while pointerMin < pointerMid do
if string.sub(text1, -pointerMid, -pointerEnd) == string.sub(text2, -pointerMid, -pointerEnd) then
pointerMin = pointerMid
pointerEnd = pointerMin
else
pointerMax = pointerMid
end
pointerMid = math.floor(pointerMin + (pointerMax - pointerMin) / 2)
end
return pointerMid
end
function StringDiff._computeDiff(text1: string, text2: string): Diffs
-- Assumes that the prefix and suffix have already been trimmed off
-- and shortcut returns have been made so these texts must be different
local text1Length, text2Length = #text1, #text2
if text1Length == 0 then
-- It's simply inserting all of text2 into text1
return { { actionType = StringDiff.ActionTypes.Insert, value = text2 } }
end
if text2Length == 0 then
-- It's simply deleting all of text1
return { { actionType = StringDiff.ActionTypes.Delete, value = text1 } }
end
local longText = if text1Length > text2Length then text1 else text2
local shortText = if text1Length > text2Length then text2 else text1
local shortTextLength = #shortText
-- Shortcut if the shorter string exists entirely inside the longer one
local indexOf = if shortTextLength == 0 then nil else string.find(longText, shortText, 1, true)
if indexOf ~= nil then
local diffs = {
{ actionType = StringDiff.ActionTypes.Insert, value = string.sub(longText, 1, indexOf - 1) },
{ actionType = StringDiff.ActionTypes.Equal, value = shortText },
{ actionType = StringDiff.ActionTypes.Insert, value = string.sub(longText, indexOf + shortTextLength) },
}
-- Swap insertions for deletions if diff is reversed
if text1Length > text2Length then
diffs[1].actionType, diffs[3].actionType = StringDiff.ActionTypes.Delete, StringDiff.ActionTypes.Delete
end
return diffs
end
if shortTextLength == 1 then
-- Single character string
-- After the previous shortcut, the character can't be an equality
return {
{ actionType = StringDiff.ActionTypes.Delete, value = text1 },
{ actionType = StringDiff.ActionTypes.Insert, value = text2 },
}
end
return StringDiff._bisect(text1, text2)
end
function StringDiff._bisect(text1: string, text2: string): Diffs
-- Find the 'middle snake' of a diff, split the problem in two
-- and return the recursively constructed diff
-- See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations
-- Cache the text lengths to prevent multiple calls
local text1Length = #text1
local text2Length = #text2
local _sub, _element
local maxD = math.ceil((text1Length + text2Length) / 2)
local vOffset = maxD
local vLength = 2 * maxD
local v1 = table.create(vLength)
local v2 = table.create(vLength)
-- Setting all elements to -1 is faster in Lua than mixing integers and nil
for x = 0, vLength - 1 do
v1[x] = -1
v2[x] = -1
end
v1[vOffset + 1] = 0
v2[vOffset + 1] = 0
local delta = text1Length - text2Length
-- If the total number of characters is odd, then
-- the front path will collide with the reverse path
local front = (delta % 2 ~= 0)
-- Offsets for start and end of k loop
-- Prevents mapping of space beyond the grid
local k1Start = 0
local k1End = 0
local k2Start = 0
local k2End = 0
for d = 0, maxD - 1 do
-- Walk the front path one step
for k1 = -d + k1Start, d - k1End, 2 do
local k1_offset = vOffset + k1
local x1
if (k1 == -d) or ((k1 ~= d) and (v1[k1_offset - 1] < v1[k1_offset + 1])) then
x1 = v1[k1_offset + 1]
else
x1 = v1[k1_offset - 1] + 1
end
local y1 = x1 - k1
while
(x1 <= text1Length)
and (y1 <= text2Length)
and (string.sub(text1, x1, x1) == string.sub(text2, y1, y1))
do
x1 = x1 + 1
y1 = y1 + 1
end
v1[k1_offset] = x1
if x1 > text1Length + 1 then
-- Ran off the right of the graph
k1End = k1End + 2
elseif y1 > text2Length + 1 then
-- Ran off the bottom of the graph
k1Start = k1Start + 2
elseif front then
local k2_offset = vOffset + delta - k1
if k2_offset >= 0 and k2_offset < vLength and v2[k2_offset] ~= -1 then
-- Mirror x2 onto top-left coordinate system
local x2 = text1Length - v2[k2_offset] + 1
if x1 > x2 then
-- Overlap detected
return StringDiff._bisectSplit(text1, text2, x1, y1)
end
end
end
end
-- Walk the reverse path one step
for k2 = -d + k2Start, d - k2End, 2 do
local k2_offset = vOffset + k2
local x2
if (k2 == -d) or ((k2 ~= d) and (v2[k2_offset - 1] < v2[k2_offset + 1])) then
x2 = v2[k2_offset + 1]
else
x2 = v2[k2_offset - 1] + 1
end
local y2 = x2 - k2
while
(x2 <= text1Length)
and (y2 <= text2Length)
and (string.sub(text1, -x2, -x2) == string.sub(text2, -y2, -y2))
do
x2 = x2 + 1
y2 = y2 + 1
end
v2[k2_offset] = x2
if x2 > text1Length + 1 then
-- Ran off the left of the graph
k2End = k2End + 2
elseif y2 > text2Length + 1 then
-- Ran off the top of the graph
k2Start = k2Start + 2
elseif not front then
local k1_offset = vOffset + delta - k2
if k1_offset >= 0 and k1_offset < vLength and v1[k1_offset] ~= -1 then
local x1 = v1[k1_offset]
local y1 = vOffset + x1 - k1_offset
-- Mirror x2 onto top-left coordinate system
x2 = text1Length - x2 + 1
if x1 > x2 then
-- Overlap detected
return StringDiff._bisectSplit(text1, text2, x1, y1)
end
end
end
end
end
-- Number of diffs equals number of characters, no commonality at all
return {
{ actionType = StringDiff.ActionTypes.Delete, value = text1 },
{ actionType = StringDiff.ActionTypes.Insert, value = text2 },
}
end
function StringDiff._bisectSplit(text1: string, text2: string, x: number, y: number): Diffs
-- Given the location of the 'middle snake',
-- split the diff in two parts and recurse
local text1a = string.sub(text1, 1, x - 1)
local text2a = string.sub(text2, 1, y - 1)
local text1b = string.sub(text1, x)
local text2b = string.sub(text2, y)
-- Compute both diffs serially
local diffs = StringDiff.findDiffs(text1a, text2a)
local diffsB = StringDiff.findDiffs(text1b, text2b)
-- Merge diffs
table.move(diffsB, 1, #diffsB, #diffs + 1, diffs)
return diffs
end
function StringDiff._reorderAndMerge(diffs: Diffs): Diffs
-- Reorder and merge like edit sections and merge equalities
-- Any edit section can move as long as it doesn't cross an equality
-- Add a dummy entry at the end
table.insert(diffs, { actionType = StringDiff.ActionTypes.Equal, value = "" })
local pointer = 1
local countDelete, countInsert = 0, 0
local textDelete, textInsert = "", ""
local commonLength
while diffs[pointer] do
local actionType = diffs[pointer].actionType
if actionType == StringDiff.ActionTypes.Insert then
countInsert = countInsert + 1
textInsert = textInsert .. diffs[pointer].value
pointer = pointer + 1
elseif actionType == StringDiff.ActionTypes.Delete then
countDelete = countDelete + 1
textDelete = textDelete .. diffs[pointer].value
pointer = pointer + 1
elseif actionType == StringDiff.ActionTypes.Equal then
-- Upon reaching an equality, check for prior redundancies
if countDelete + countInsert > 1 then
if (countDelete > 0) and (countInsert > 0) then
-- Factor out any common prefixies
commonLength = StringDiff._sharedPrefix(textInsert, textDelete)
if commonLength > 0 then
local back_pointer = pointer - countDelete - countInsert
if
(back_pointer > 1) and (diffs[back_pointer - 1].actionType == StringDiff.ActionTypes.Equal)
then
diffs[back_pointer - 1].value = diffs[back_pointer - 1].value
.. string.sub(textInsert, 1, commonLength)
else
table.insert(diffs, 1, {
actionType = StringDiff.ActionTypes.Equal,
value = string.sub(textInsert, 1, commonLength),
})
pointer = pointer + 1
end
textInsert = string.sub(textInsert, commonLength + 1)
textDelete = string.sub(textDelete, commonLength + 1)
end
-- Factor out any common suffixies
commonLength = StringDiff._sharedSuffix(textInsert, textDelete)
if commonLength ~= 0 then
diffs[pointer].value = string.sub(textInsert, -commonLength) .. diffs[pointer].value
textInsert = string.sub(textInsert, 1, -commonLength - 1)
textDelete = string.sub(textDelete, 1, -commonLength - 1)
end
end
-- Delete the offending records and add the merged ones
pointer = pointer - countDelete - countInsert
for _ = 1, countDelete + countInsert do
table.remove(diffs, pointer)
end
if #textDelete > 0 then
table.insert(diffs, pointer, { actionType = StringDiff.ActionTypes.Delete, value = textDelete })
pointer = pointer + 1
end
if #textInsert > 0 then
table.insert(diffs, pointer, { actionType = StringDiff.ActionTypes.Insert, value = textInsert })
pointer = pointer + 1
end
pointer = pointer + 1
elseif (pointer > 1) and (diffs[pointer - 1].actionType == StringDiff.ActionTypes.Equal) then
-- Merge this equality with the previous one
diffs[pointer - 1].value = diffs[pointer - 1].value .. diffs[pointer].value
table.remove(diffs, pointer)
else
pointer = pointer + 1
end
countInsert, countDelete = 0, 0
textDelete, textInsert = "", ""
end
end
if diffs[#diffs].value == "" then
-- Remove the dummy entry at the end
diffs[#diffs] = nil
end
-- Second pass: look for single edits surrounded on both sides by equalities
-- which can be shifted sideways to eliminate an equality
-- e.g: ABAC -> ABAC
local changes = false
pointer = 2
-- Intentionally ignore the first and last element (don't need checking)
while pointer < #diffs do
local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
if
(prevDiff.actionType == StringDiff.ActionTypes.Equal)
and (nextDiff.actionType == StringDiff.ActionTypes.Equal)
then
-- This is a single edit surrounded by equalities
local currentDiff = diffs[pointer]
local currentText = currentDiff.value
local prevText = prevDiff.value
local nextText = nextDiff.value
if #prevText == 0 then
table.remove(diffs, pointer - 1)
changes = true
elseif string.sub(currentText, -#prevText) == prevText then
-- Shift the edit over the previous equality
currentDiff.value = prevText .. string.sub(currentText, 1, -#prevText - 1)
nextDiff.value = prevText .. nextDiff.value
table.remove(diffs, pointer - 1)
changes = true
elseif string.sub(currentText, 1, #nextText) == nextText then
-- Shift the edit over the next equality
prevDiff.value = prevText .. nextText
currentDiff.value = string.sub(currentText, #nextText + 1) .. nextText
table.remove(diffs, pointer + 1)
changes = true
end
end
pointer = pointer + 1
end
-- If shifts were made, the diffs need reordering and another shift sweep
if changes then
return StringDiff._reorderAndMerge(diffs)
end
return diffs
end
return StringDiff