rhodecode-enterprise-ce Commit - r4954:f53817a4

1

2

3

4

1

"""Diff Match and Patch

5

2

6

3

7

4

http://code.google.com/p/google-diff-match-patch/

8

5

9

6

Licensed under the Apache License, Version 2.0 (the "License");

10

7

you may not use this file except in compliance with the License.

11

8

You may obtain a copy of the License at

12

9

13

10

http://www.apache.org/licenses/LICENSE-2.0

14

11

15

12

Unless required by applicable law or agreed to in writing, software

16

13

distributed under the License is distributed on an "AS IS" BASIS,

17

14

WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

18

15

See the License for the specific language governing permissions and

19

16

limitations under the License.

20

17

"""

21

18

22

19

"""Functions for diff, match and patch.

23

20

24

21

Computes the difference between two texts to create a patch.

25

22

Applies the patch onto another text, allowing for errors.

26

23

"""

27

24

28

__author__ = 'fraser@google.com (Neil Fraser)'

25

__author__ = "fraser@google.com (Neil Fraser)"

29

26

30

27

import math

31

28

import re

32

29

import sys

33

30

import time

34

31

import urllib.request, urllib.parse, urllib.error

35

32

36

33

37

34

class diff_match_patch:

38

"""Class containing the diff, match and patch methods.

39

40

Also contains the behaviour settings.

41

"""

42

43

def __init__(self):

44

"""Inits a diff_match_patch object with default settings.

45

Redefine these in your program to override the defaults.

46

"""

47

48

# Number of seconds to map a diff before giving up (0 for infinity).

49

self.Diff_Timeout = 1.0

50

# Cost of an empty edit operation in terms of edit characters.

51

self.Diff_EditCost = 4

52

# At what point is no match declared (0.0 = perfection, 1.0 = very loose).

53

self.Match_Threshold = 0.5

54

# How far to search for a match (0 = exact location, 1000+ = broad match).

55

# A match this many characters away from the expected location will add

56

# 1.0 to the score (0.0 is a perfect match).

57

self.Match_Distance = 1000

58

# When deleting a large block of text (over ~64 characters), how close do

59

# the contents have to be to match the expected contents. (0.0 = perfection,

60

# 1.0 = very loose). Note that Match_Threshold controls how closely the

61

# end points of a delete need to match.

62

self.Patch_DeleteThreshold = 0.5

63

# Chunk size for context length.

64

self.Patch_Margin = 4

65

66

# The number of bits in an int.

67

# Python has no maximum, thus to disable patch splitting set to 0.

68

# However to avoid long patches in certain pathological cases, use 32.

69

# Multiple short patches (using native ints) are much faster than long ones.

70

self.Match_MaxBits = 32

71

72

# DIFF FUNCTIONS

73

74

# The data structure representing a diff is an array of tuples:

75

# [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]

76

# which means: delete "Hello", add "Goodbye" and keep " world."

77

DIFF_DELETE = -1

78

DIFF_INSERT = 1

79

DIFF_EQUAL = 0

80

81

def diff_main(self, text1, text2, checklines=True, deadline=None):

82

"""Find the differences between two texts. Simplifies the problem by

83

stripping any common prefix or suffix off the texts before diffing.

84

85

Args:

86

text1: Old string to be diffed.

87

text2: New string to be diffed.

88

checklines: Optional speedup flag. If present and false, then don't run

89

a line-level diff first to identify the changed areas.

90

Defaults to true, which does a faster, slightly less optimal diff.

91

deadline: Optional time when the diff should be complete by. Used

92

internally for recursive calls. Users should set DiffTimeout instead.

93

94

Returns:

95

Array of changes.

96

"""

97

# Set a deadline by which time the diff must be complete.

98

if deadline is None:

99

# Unlike in most languages, Python counts time in seconds.

100

if self.Diff_Timeout <= 0:

101

deadline = sys.maxint

102

else:

103

deadline = time.time() + self.Diff_Timeout

104

105

# Check for null inputs.

106

if text1 is None or text2 is None:

107

raise ValueError("Null inputs. (diff_main)")

108

109

# Check for equality (speedup).

110

if text1 == text2:

111

if text1:

112

return [(self.DIFF_EQUAL, text1)]

113

return []

114

115

# Trim off common prefix (speedup).

116

commonlength = self.diff_commonPrefix(text1, text2)

117

commonprefix = text1[:commonlength]

118

text1 = text1[commonlength:]

119

text2 = text2[commonlength:]

120

121

# Trim off common suffix (speedup).

122

commonlength = self.diff_commonSuffix(text1, text2)

123

if commonlength == 0:

124

commonsuffix = ''

125

else:

126

commonsuffix = text1[-commonlength:]

127

text1 = text1[:-commonlength]

128

text2 = text2[:-commonlength]

129

130

# Compute the diff on the middle block.

131

diffs = self.diff_compute(text1, text2, checklines, deadline)

132

133

# Restore the prefix and suffix.

134

if commonprefix:

135

diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]

136

if commonsuffix:

137

diffs.append((self.DIFF_EQUAL, commonsuffix))

138

self.diff_cleanupMerge(diffs)

139

return diffs

140

141

def diff_compute(self, text1, text2, checklines, deadline):

142

"""Find the differences between two texts. Assumes that the texts do not

143

have any common prefix or suffix.

144

145

Args:

146

text1: Old string to be diffed.

147

text2: New string to be diffed.

148

checklines: Speedup flag. If false, then don't run a line-level diff

149

first to identify the changed areas.

150

If true, then run a faster, slightly less optimal diff.

151

deadline: Time when the diff should be complete by.

152

153

Returns:

154

Array of changes.

155

"""

156

if not text1:

157

# Just add some text (speedup).

158

return [(self.DIFF_INSERT, text2)]

159

160

if not text2:

161

# Just delete some text (speedup).

162

return [(self.DIFF_DELETE, text1)]

163

164

if len(text1) > len(text2):

165

(longtext, shorttext) = (text1, text2)

166

else:

167

(shorttext, longtext) = (text1, text2)

168

i = longtext.find(shorttext)

169

if i != -1:

170

# Shorter text is inside the longer text (speedup).

171

diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),

172

(self.DIFF_INSERT, longtext[i + len(shorttext):])]

173

# Swap insertions for deletions if diff is reversed.

174

if len(text1) > len(text2):

175

diffs[0] = (self.DIFF_DELETE, diffs[0][1])

176

diffs[2] = (self.DIFF_DELETE, diffs[2][1])

177

return diffs

178

179

if len(shorttext) == 1:

180

# Single character string.

181

# After the previous speedup, the character can't be an equality.

182

return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]

183

184

# Check to see if the problem can be split in two.

185

hm = self.diff_halfMatch(text1, text2)

186

if hm:

187

# A half-match was found, sort out the return data.

188

(text1_a, text1_b, text2_a, text2_b, mid_common) = hm

189

# Send both pairs off for separate processing.

190

diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)

191

diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)

192

# Merge the results.

193

return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b

194

195

if checklines and len(text1) > 100 and len(text2) > 100:

196

return self.diff_lineMode(text1, text2, deadline)

197

198

return self.diff_bisect(text1, text2, deadline)

199

200

def diff_lineMode(self, text1, text2, deadline):

201

"""Do a quick line-level diff on both strings, then rediff the parts for

202

greater accuracy.

203

This speedup can produce non-minimal diffs.

204

205

Args:

206

text1: Old string to be diffed.

207

text2: New string to be diffed.

208

deadline: Time when the diff should be complete by.

209

210

Returns:

211

Array of changes.

212

"""

213

214

# Scan the text on a line-by-line basis first.

215

(text1, text2, linearray) = self.diff_linesToChars(text1, text2)

216

217

diffs = self.diff_main(text1, text2, False, deadline)

218

219

# Convert the diff back to original text.

220

self.diff_charsToLines(diffs, linearray)

221

# Eliminate freak matches (e.g. blank lines)

222

self.diff_cleanupSemantic(diffs)

223

224

# Rediff any replacement blocks, this time character-by-character.

225

# Add a dummy entry at the end.

226

diffs.append((self.DIFF_EQUAL, ''))

227

pointer = 0

228

count_delete = 0

229

count_insert = 0

230

text_delete = ''

231

text_insert = ''

232

while pointer < len(diffs):

233

if diffs[pointer][0] == self.DIFF_INSERT:

234

count_insert += 1

235

text_insert += diffs[pointer][1]

236

elif diffs[pointer][0] == self.DIFF_DELETE:

237

count_delete += 1

238

text_delete += diffs[pointer][1]

239

elif diffs[pointer][0] == self.DIFF_EQUAL:

240

# Upon reaching an equality, check for prior redundancies.

241

if count_delete >= 1 and count_insert >= 1:

242

# Delete the offending records and add the merged ones.

243

a = self.diff_main(text_delete, text_insert, False, deadline)

244

diffs[pointer - count_delete - count_insert : pointer] = a

245

pointer = pointer - count_delete - count_insert + len(a)

246

count_insert = 0

247

count_delete = 0

248

text_delete = ''

249

text_insert = ''

250

251

pointer += 1

252

253

diffs.pop() # Remove the dummy entry at the end.

254

255

return diffs

256

257

def diff_bisect(self, text1, text2, deadline):

258

"""Find the 'middle snake' of a diff, split the problem in two

259

and return the recursively constructed diff.

260

See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.

261

262

Args:

263

text1: Old string to be diffed.

264

text2: New string to be diffed.

265

deadline: Time at which to bail if not yet complete.

266

267

Returns:

268

Array of diff tuples.

269

"""

270

271

# Cache the text lengths to prevent multiple calls.

272

text1_length = len(text1)

273

text2_length = len(text2)

274

max_d = (text1_length + text2_length + 1) // 2

275

v_offset = max_d

276

v_length = 2 * max_d

277

v1 = [-1] * v_length

278

v1[v_offset + 1] = 0

279

v2 = v1[:]

280

delta = text1_length - text2_length

281

# If the total number of characters is odd, then the front path will

282

# collide with the reverse path.

283

front = (delta % 2 != 0)

284

# Offsets for start and end of k loop.

285

# Prevents mapping of space beyond the grid.

286

k1start = 0

287

k1end = 0

288

k2start = 0

289

k2end = 0

290

for d in range(max_d):

291

# Bail out if deadline is reached.

292

if time.time() > deadline:

293

break

294

295

# Walk the front path one step.

296

for k1 in range(-d + k1start, d + 1 - k1end, 2):

297

k1_offset = v_offset + k1

298

if k1 == -d or (k1 != d and

299

v1[k1_offset - 1] < v1[k1_offset + 1]):

300

x1 = v1[k1_offset + 1]

301

else:

302

x1 = v1[k1_offset - 1] + 1

303

y1 = x1 - k1

304

while (x1 < text1_length and y1 < text2_length and

305

text1[x1] == text2[y1]):

306

x1 += 1

307

y1 += 1

308

v1[k1_offset] = x1

309

if x1 > text1_length:

310

# Ran off the right of the graph.

311

k1end += 2

312

elif y1 > text2_length:

313

# Ran off the bottom of the graph.

314

k1start += 2

315

elif front:

316

k2_offset = v_offset + delta - k1

317

if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:

318

# Mirror x2 onto top-left coordinate system.

319

x2 = text1_length - v2[k2_offset]

320

if x1 >= x2:

321

# Overlap detected.

322

return self.diff_bisectSplit(text1, text2, x1, y1, deadline)

323

324

# Walk the reverse path one step.

325

for k2 in range(-d + k2start, d + 1 - k2end, 2):

326

k2_offset = v_offset + k2

327

if k2 == -d or (k2 != d and

328

v2[k2_offset - 1] < v2[k2_offset + 1]):

329

x2 = v2[k2_offset + 1]

330

else:

331

x2 = v2[k2_offset - 1] + 1

332

y2 = x2 - k2

333

while (x2 < text1_length and y2 < text2_length and

334

text1[-x2 - 1] == text2[-y2 - 1]):

335

x2 += 1

336

y2 += 1

337

v2[k2_offset] = x2

338

if x2 > text1_length:

339

# Ran off the left of the graph.

340

k2end += 2

341

elif y2 > text2_length:

342

# Ran off the top of the graph.

343

k2start += 2

344

elif not front:

345

k1_offset = v_offset + delta - k2

346

if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:

347

x1 = v1[k1_offset]

348

y1 = v_offset + x1 - k1_offset

349

# Mirror x2 onto top-left coordinate system.

350

x2 = text1_length - x2

351

if x1 >= x2:

352

# Overlap detected.

353

return self.diff_bisectSplit(text1, text2, x1, y1, deadline)

354

355

# Diff took too long and hit the deadline or

356

# number of diffs equals number of characters, no commonality at all.

357

return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]

358

359

def diff_bisectSplit(self, text1, text2, x, y, deadline):

360

"""Given the location of the 'middle snake', split the diff in two parts

361

and recurse.

362

363

Args:

364

text1: Old string to be diffed.

365

text2: New string to be diffed.

366

x: Index of split point in text1.

367

y: Index of split point in text2.

368

deadline: Time at which to bail if not yet complete.

369

370

Returns:

371

Array of diff tuples.

372

"""

373

text1a = text1[:x]

374

text2a = text2[:y]

375

text1b = text1[x:]

376

text2b = text2[y:]

377

378

# Compute both diffs serially.

379

diffs = self.diff_main(text1a, text2a, False, deadline)

380

diffsb = self.diff_main(text1b, text2b, False, deadline)

381

382

return diffs + diffsb

383

384

def diff_linesToChars(self, text1, text2):

385

"""Split two texts into an array of strings. Reduce the texts to a string

386

of hashes where each Unicode character represents one line.

35

"""Class containing the diff, match and patch methods.

387

36

388

Args:

389

text1: First string.

390

text2: Second string.

391

392

Returns:

393

Three element tuple, containing the encoded text1, the encoded text2 and

394

the array of unique strings. The zeroth element of the array of unique

395

strings is intentionally blank.

396

"""

397

lineArray = [] # e.g. lineArray[4] == "Hello\n"

398

lineHash = {} # e.g. lineHash["Hello\n"] == 4

399

400

# "\x00" is a valid character, but various debuggers don't like it.

401

# So we'll insert a junk entry to avoid generating a null character.

402

lineArray.append('')

403

404

def diff_linesToCharsMunge(text):

405

"""Split a text into an array of strings. Reduce the texts to a string

406

of hashes where each Unicode character represents one line.

407

Modifies linearray and linehash through being a closure.

408

409

Args:

410

text: String to encode.

411

412

Returns:

413

Encoded string.

414

"""

415

chars = []

416

# Walk the text, pulling out a substring for each line.

417

# text.split('\n') would would temporarily double our memory footprint.

418

# Modifying text would create many large strings to garbage collect.

419

lineStart = 0

420

lineEnd = -1

421

while lineEnd < len(text) - 1:

422

lineEnd = text.find('\n', lineStart)

423

if lineEnd == -1:

424

lineEnd = len(text) - 1

425

line = text[lineStart:lineEnd + 1]

426

lineStart = lineEnd + 1

427

428

if line in lineHash:

429

chars.append(unichr(lineHash[line]))

430

else:

431

lineArray.append(line)

432

lineHash[line] = len(lineArray) - 1

433

chars.append(unichr(len(lineArray) - 1))

434

return "".join(chars)

435

436

chars1 = diff_linesToCharsMunge(text1)

437

chars2 = diff_linesToCharsMunge(text2)

438

return (chars1, chars2, lineArray)

439

440

def diff_charsToLines(self, diffs, lineArray):

441

"""Rehydrate the text in a diff from a string of line hashes to real lines

442

of text.

443

444

Args:

445

diffs: Array of diff tuples.

446

lineArray: Array of unique strings.

447

"""

448

for x in range(len(diffs)):

449

text = []

450

for char in diffs[x][1]:

451

text.append(lineArray[ord(char)])

452

diffs[x] = (diffs[x][0], "".join(text))

453

454

def diff_commonPrefix(self, text1, text2):

455

"""Determine the common prefix of two strings.

456

457

Args:

458

text1: First string.

459

text2: Second string.

460

461

Returns:

462

The number of characters common to the start of each string.

463

"""

464

# Quick check for common null cases.

465

if not text1 or not text2 or text1[0] != text2[0]:

466

return 0

467

# Binary search.

468

# Performance analysis: http://neil.fraser.name/news/2007/10/09/

469

pointermin = 0

470

pointermax = min(len(text1), len(text2))

471

pointermid = pointermax

472

pointerstart = 0

473

while pointermin < pointermid:

474

if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:

475

pointermin = pointermid

476

pointerstart = pointermin

477

else:

478

pointermax = pointermid

479

pointermid = (pointermax - pointermin) // 2 + pointermin

480

return pointermid

481

482

def diff_commonSuffix(self, text1, text2):

483

"""Determine the common suffix of two strings.

484

485

Args:

486

text1: First string.

487

text2: Second string.

488

489

Returns:

490

The number of characters common to the end of each string.

491

"""

492

# Quick check for common null cases.

493

if not text1 or not text2 or text1[-1] != text2[-1]:

494

return 0

495

# Binary search.

496

# Performance analysis: http://neil.fraser.name/news/2007/10/09/

497

pointermin = 0

498

pointermax = min(len(text1), len(text2))

499

pointermid = pointermax

500

pointerend = 0

501

while pointermin < pointermid:

502

if (text1[-pointermid:len(text1) - pointerend] ==

503

text2[-pointermid:len(text2) - pointerend]):

504

pointermin = pointermid

505

pointerend = pointermin

506

else:

507

pointermax = pointermid

508

pointermid = (pointermax - pointermin) // 2 + pointermin

509

return pointermid

510

511

def diff_commonOverlap(self, text1, text2):

512

"""Determine if the suffix of one string is the prefix of another.

513

514

Args:

515

text1 First string.

516

text2 Second string.

517

518

Returns:

519

The number of characters common to the end of the first

520

string and the start of the second string.

521

"""

522

# Cache the text lengths to prevent multiple calls.

523

text1_length = len(text1)

524

text2_length = len(text2)

525

# Eliminate the null case.

526

if text1_length == 0 or text2_length == 0:

527

return 0

528

# Truncate the longer string.

529

if text1_length > text2_length:

530

text1 = text1[-text2_length:]

531

elif text1_length < text2_length:

532

text2 = text2[:text1_length]

533

text_length = min(text1_length, text2_length)

534

# Quick check for the worst case.

535

if text1 == text2:

536

return text_length

537

538

# Start by looking for a single character match

539

# and increase length until no match is found.

540

# Performance analysis: http://neil.fraser.name/news/2010/11/04/

541

best = 0

542

length = 1

543

while True:

544

pattern = text1[-length:]

545

found = text2.find(pattern)

546

if found == -1:

547

return best

548

length += found

549

if found == 0 or text1[-length:] == text2[:length]:

550

best = length

551

length += 1

552

553

def diff_halfMatch(self, text1, text2):

554

"""Do the two texts share a substring which is at least half the length of

555

the longer text?

556

This speedup can produce non-minimal diffs.

557

558

Args:

559

text1: First string.

560

text2: Second string.

561

562

Returns:

563

Five element Array, containing the prefix of text1, the suffix of text1,

564

the prefix of text2, the suffix of text2 and the common middle. Or None

565

if there was no match.

566

"""

567

if self.Diff_Timeout <= 0:

568

# Don't risk returning a non-optimal diff if we have unlimited time.

569

return None

570

if len(text1) > len(text2):

571

(longtext, shorttext) = (text1, text2)

572

else:

573

(shorttext, longtext) = (text1, text2)

574

if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):

575

return None # Pointless.

576

577

def diff_halfMatchI(longtext, shorttext, i):

578

"""Does a substring of shorttext exist within longtext such that the

579

substring is at least half the length of longtext?

580

Closure, but does not reference any external variables.

581

582

Args:

583

longtext: Longer string.

584

shorttext: Shorter string.

585

i: Start index of quarter length substring within longtext.

586

587

Returns:

588

Five element Array, containing the prefix of longtext, the suffix of

589

longtext, the prefix of shorttext, the suffix of shorttext and the

590

common middle. Or None if there was no match.

591

"""

592

seed = longtext[i:i + len(longtext) // 4]

593

best_common = ''

594

j = shorttext.find(seed)

595

while j != -1:

596

prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])

597

suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])

598

if len(best_common) < suffixLength + prefixLength:

599

best_common = (shorttext[j - suffixLength:j] +

600

shorttext[j:j + prefixLength])

601

best_longtext_a = longtext[:i - suffixLength]

602

best_longtext_b = longtext[i + prefixLength:]

603

best_shorttext_a = shorttext[:j - suffixLength]

604

best_shorttext_b = shorttext[j + prefixLength:]

605

j = shorttext.find(seed, j + 1)

606

607

if len(best_common) * 2 >= len(longtext):

608

return (best_longtext_a, best_longtext_b,

609

best_shorttext_a, best_shorttext_b, best_common)

610

else:

611

return None

612

613

# First check if the second quarter is the seed for a half-match.

614

hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)

615

# Check again based on the third quarter.

616

hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)

617

if not hm1 and not hm2:

618

return None

619

elif not hm2:

620

hm = hm1

621

elif not hm1:

622

hm = hm2

623

else:

624

# Both matched. Select the longest.

625

if len(hm1[4]) > len(hm2[4]):

626

hm = hm1

627

else:

628

hm = hm2

629

630

# A half-match was found, sort out the return data.

631

if len(text1) > len(text2):

632

(text1_a, text1_b, text2_a, text2_b, mid_common) = hm

633

else:

634

(text2_a, text2_b, text1_a, text1_b, mid_common) = hm

635

return (text1_a, text1_b, text2_a, text2_b, mid_common)

636

637

def diff_cleanupSemantic(self, diffs):

638

"""Reduce the number of edits by eliminating semantically trivial

639

equalities.

640

641

Args:

642

diffs: Array of diff tuples.

643

"""

644

changes = False

645

equalities = [] # Stack of indices where equalities are found.

646

lastequality = None # Always equal to diffs[equalities[-1]][1]

647

pointer = 0 # Index of current position.

648

# Number of chars that changed prior to the equality.

649

length_insertions1, length_deletions1 = 0, 0

650

# Number of chars that changed after the equality.

651

length_insertions2, length_deletions2 = 0, 0

652

while pointer < len(diffs):

653

if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.

654

equalities.append(pointer)

655

length_insertions1, length_insertions2 = length_insertions2, 0

656

length_deletions1, length_deletions2 = length_deletions2, 0

657

lastequality = diffs[pointer][1]

658

else: # An insertion or deletion.

659

if diffs[pointer][0] == self.DIFF_INSERT:

660

length_insertions2 += len(diffs[pointer][1])

661

else:

662

length_deletions2 += len(diffs[pointer][1])

663

# Eliminate an equality that is smaller or equal to the edits on both

664

# sides of it.

665

if (lastequality and (len(lastequality) <=

666

max(length_insertions1, length_deletions1)) and

667

(len(lastequality) <= max(length_insertions2, length_deletions2))):

668

# Duplicate record.

669

diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))

670

# Change second copy to insert.

671

diffs[equalities[-1] + 1] = (self.DIFF_INSERT,

672

diffs[equalities[-1] + 1][1])

673

# Throw away the equality we just deleted.

674

equalities.pop()

675

# Throw away the previous equality (it needs to be reevaluated).

676

if len(equalities):

677

equalities.pop()

678

if len(equalities):

679

pointer = equalities[-1]

680

else:

681

pointer = -1

682

# Reset the counters.

683

length_insertions1, length_deletions1 = 0, 0

684

length_insertions2, length_deletions2 = 0, 0

685

lastequality = None

686

changes = True

687

pointer += 1

688

689

# Normalize the diff.

690

if changes:

691

self.diff_cleanupMerge(diffs)

692

self.diff_cleanupSemanticLossless(diffs)

693

694

# Find any overlaps between deletions and insertions.

695

# e.g: <del>abcxxx</del><ins>xxxdef</ins>

696

# -> <del>abc</del>xxx<ins>def</ins>

697

# e.g: <del>xxxabc</del><ins>defxxx</ins>

698

# -> <ins>def</ins>xxx<del>abc</del>

699

# Only extract an overlap if it is as big as the edit ahead or behind it.

700

pointer = 1

701

while pointer < len(diffs):

702

if (diffs[pointer - 1][0] == self.DIFF_DELETE and

703

diffs[pointer][0] == self.DIFF_INSERT):

704

deletion = diffs[pointer - 1][1]

705

insertion = diffs[pointer][1]

706

overlap_length1 = self.diff_commonOverlap(deletion, insertion)

707

overlap_length2 = self.diff_commonOverlap(insertion, deletion)

708

if overlap_length1 >= overlap_length2:

709

if (overlap_length1 >= len(deletion) / 2.0 or

710

overlap_length1 >= len(insertion) / 2.0):

711

# Overlap found. Insert an equality and trim the surrounding edits.

712

diffs.insert(pointer, (self.DIFF_EQUAL,

713

insertion[:overlap_length1]))

714

diffs[pointer - 1] = (self.DIFF_DELETE,

715

deletion[:len(deletion) - overlap_length1])

716

diffs[pointer + 1] = (self.DIFF_INSERT,

717

insertion[overlap_length1:])

718

pointer += 1

719

else:

720

if (overlap_length2 >= len(deletion) / 2.0 or

721

overlap_length2 >= len(insertion) / 2.0):

722

# Reverse overlap found.

723

# Insert an equality and swap and trim the surrounding edits.

724

diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))

725

diffs[pointer - 1] = (self.DIFF_INSERT,

726

insertion[:len(insertion) - overlap_length2])

727

diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])

728

pointer += 1

729

pointer += 1

730

pointer += 1

731

732

def diff_cleanupSemanticLossless(self, diffs):

733

"""Look for single edits surrounded on both sides by equalities

734

which can be shifted sideways to align the edit to a word boundary.

735

e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.

736

737

Args:

738

diffs: Array of diff tuples.

37

Also contains the behaviour settings.

739

38

"""

740

39

741

def diff_cleanupSemanticScore(one, two):

742

"""Given two strings, compute a score representing whether the

743

internal boundary falls on logical boundaries.

744

Scores range from 6 (best) to 0 (worst).

745

Closure, but does not reference any external variables.

40

def __init__(self):

41

"""Inits a diff_match_patch object with default settings.

42

Redefine these in your program to override the defaults.

43

"""

44

45

# Number of seconds to map a diff before giving up (0 for infinity).

46

self.Diff_Timeout = 1.0

47

# Cost of an empty edit operation in terms of edit characters.

48

self.Diff_EditCost = 4

49

# At what point is no match declared (0.0 = perfection, 1.0 = very loose).

50

self.Match_Threshold = 0.5

51

# How far to search for a match (0 = exact location, 1000+ = broad match).

52

# A match this many characters away from the expected location will add

53

# 1.0 to the score (0.0 is a perfect match).

54

self.Match_Distance = 1000

55

# When deleting a large block of text (over ~64 characters), how close do

56

# the contents have to be to match the expected contents. (0.0 = perfection,

57

# 1.0 = very loose). Note that Match_Threshold controls how closely the

58

# end points of a delete need to match.

59

self.Patch_DeleteThreshold = 0.5

60

# Chunk size for context length.

61

self.Patch_Margin = 4

62

63

# The number of bits in an int.

64

# Python has no maximum, thus to disable patch splitting set to 0.

65

# However to avoid long patches in certain pathological cases, use 32.

66

# Multiple short patches (using native ints) are much faster than long ones.

67

self.Match_MaxBits = 32

68

69

# DIFF FUNCTIONS

746

70

747

Args:

748

one: First string.

749

two: Second string.

71

# The data structure representing a diff is an array of tuples:

72

# [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]

73

# which means: delete "Hello", add "Goodbye" and keep " world."

74

DIFF_DELETE = -1

75

DIFF_INSERT = 1

76

DIFF_EQUAL = 0

77

78

def diff_main(self, text1, text2, checklines=True, deadline=None):

79

"""Find the differences between two texts. Simplifies the problem by

80

stripping any common prefix or suffix off the texts before diffing.

81

82

Args:

83

text1: Old string to be diffed.

84

text2: New string to be diffed.

85

checklines: Optional speedup flag. If present and false, then don't run

86

a line-level diff first to identify the changed areas.

87

Defaults to true, which does a faster, slightly less optimal diff.

88

deadline: Optional time when the diff should be complete by. Used

89

internally for recursive calls. Users should set DiffTimeout instead.

90

91

Returns:

92

Array of changes.

93

"""

94

# Set a deadline by which time the diff must be complete.

95

if deadline is None:

96

# Unlike in most languages, Python counts time in seconds.

97

if self.Diff_Timeout <= 0:

98

deadline = sys.maxsize

99

else:

100

deadline = time.time() + self.Diff_Timeout

101

102

# Check for null inputs.

103

if text1 is None or text2 is None:

104

raise ValueError("Null inputs. (diff_main)")

750

105

751

Returns:

752

The score.

753

"""

754

if not one or not two:

755

# Edges are the best.

756

return 6

106

# Check for equality (speedup).

107

if text1 == text2:

108

if text1:

109

return [(self.DIFF_EQUAL, text1)]

110

return []

111

112

# Trim off common prefix (speedup).

113

commonlength = self.diff_commonPrefix(text1, text2)

114

commonprefix = text1[:commonlength]

115

text1 = text1[commonlength:]

116

text2 = text2[commonlength:]

117

118

# Trim off common suffix (speedup).

119

commonlength = self.diff_commonSuffix(text1, text2)

120

if commonlength == 0:

121

commonsuffix = ""

122

else:

123

commonsuffix = text1[-commonlength:]

124

text1 = text1[:-commonlength]

125

text2 = text2[:-commonlength]

126

127

# Compute the diff on the middle block.

128

diffs = self.diff_compute(text1, text2, checklines, deadline)

129

130

# Restore the prefix and suffix.

131

if commonprefix:

132

diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]

133

if commonsuffix:

134

diffs.append((self.DIFF_EQUAL, commonsuffix))

135

self.diff_cleanupMerge(diffs)

136

return diffs

137

138

def diff_compute(self, text1, text2, checklines, deadline):

139

"""Find the differences between two texts. Assumes that the texts do not

140

have any common prefix or suffix.

757

141

758

# Each port of this function behaves slightly differently due to

759

# subtle differences in each language's definition of things like

760

# 'whitespace'. Since this function's purpose is largely cosmetic,

761

# the choice has been made to use each language's native features

762

# rather than force total conformity.

763

char1 = one[-1]

764

char2 = two[0]

765

nonAlphaNumeric1 = not char1.isalnum()

766

nonAlphaNumeric2 = not char2.isalnum()

767

whitespace1 = nonAlphaNumeric1 and char1.isspace()

768

whitespace2 = nonAlphaNumeric2 and char2.isspace()

769

lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")

770

lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")

771

blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)

772

blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)

142

Args:

143

text1: Old string to be diffed.

144

text2: New string to be diffed.

145

checklines: Speedup flag. If false, then don't run a line-level diff

146

first to identify the changed areas.

147

If true, then run a faster, slightly less optimal diff.

148

deadline: Time when the diff should be complete by.

149

150

Returns:

151

Array of changes.

152

"""

153

if not text1:

154

# Just add some text (speedup).

155

return [(self.DIFF_INSERT, text2)]

156

157

if not text2:

158

# Just delete some text (speedup).

159

return [(self.DIFF_DELETE, text1)]

160

161

if len(text1) > len(text2):

162

(longtext, shorttext) = (text1, text2)

163

else:

164

(shorttext, longtext) = (text1, text2)

165

i = longtext.find(shorttext)

166

if i != -1:

167

# Shorter text is inside the longer text (speedup).

168

diffs = [

169

(self.DIFF_INSERT, longtext[:i]),

170

(self.DIFF_EQUAL, shorttext),

171

(self.DIFF_INSERT, longtext[i + len(shorttext) :]),

172

]

173

# Swap insertions for deletions if diff is reversed.

174

if len(text1) > len(text2):

175

diffs[0] = (self.DIFF_DELETE, diffs[0][1])

176

diffs[2] = (self.DIFF_DELETE, diffs[2][1])

177

return diffs

773

178

774

if blankLine1 or blankLine2:

775

# Five points for blank lines.

776

return 5

777

elif lineBreak1 or lineBreak2:

778

# Four points for line breaks.

779

return 4

780

elif nonAlphaNumeric1 and not whitespace1 and whitespace2:

781

# Three points for end of sentences.

782

return 3

783

elif whitespace1 or whitespace2:

784

# Two points for whitespace.

785

return 2

786

elif nonAlphaNumeric1 or nonAlphaNumeric2:

787

# One point for non-alphanumeric.

788

return 1

789

return 0

179

if len(shorttext) == 1:

180

# Single character string.

181

# After the previous speedup, the character can't be an equality.

182

return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]

183

184

# Check to see if the problem can be split in two.

185

hm = self.diff_halfMatch(text1, text2)

186

if hm:

187

# A half-match was found, sort out the return data.

188

(text1_a, text1_b, text2_a, text2_b, mid_common) = hm

189

# Send both pairs off for separate processing.

190

diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)

191

diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)

192

# Merge the results.

193

return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b

194

195

if checklines and len(text1) > 100 and len(text2) > 100:

196

return self.diff_lineMode(text1, text2, deadline)

197

198

return self.diff_bisect(text1, text2, deadline)

199

200

def diff_lineMode(self, text1, text2, deadline):

201

"""Do a quick line-level diff on both strings, then rediff the parts for

202

greater accuracy.

203

This speedup can produce non-minimal diffs.

204

205

Args:

206

text1: Old string to be diffed.

207

text2: New string to be diffed.

208

deadline: Time when the diff should be complete by.

209

210

Returns:

211

Array of changes.

212

"""

790

213

791

pointer = 1

792

# Intentionally ignore the first and last element (don't need checking).

793

while pointer < len(diffs) - 1:

794

if (diffs[pointer - 1][0] == self.DIFF_EQUAL and

795

diffs[pointer + 1][0] == self.DIFF_EQUAL):

796

# This is a single edit surrounded by equalities.

797

equality1 = diffs[pointer - 1][1]

798

edit = diffs[pointer][1]

799

equality2 = diffs[pointer + 1][1]

214

# Scan the text on a line-by-line basis first.

215

(text1, text2, linearray) = self.diff_linesToChars(text1, text2)

216

217

diffs = self.diff_main(text1, text2, False, deadline)

218

219

# Convert the diff back to original text.

220

self.diff_charsToLines(diffs, linearray)

221

# Eliminate freak matches (e.g. blank lines)

222

self.diff_cleanupSemantic(diffs)

223

224

# Rediff any replacement blocks, this time character-by-character.

225

# Add a dummy entry at the end.

226

diffs.append((self.DIFF_EQUAL, ""))

227

pointer = 0

228

count_delete = 0

229

count_insert = 0

230

text_delete = ""

231

text_insert = ""

232

while pointer < len(diffs):

233

if diffs[pointer][0] == self.DIFF_INSERT:

234

count_insert += 1

235

text_insert += diffs[pointer][1]

236

elif diffs[pointer][0] == self.DIFF_DELETE:

237

count_delete += 1

238

text_delete += diffs[pointer][1]

239

elif diffs[pointer][0] == self.DIFF_EQUAL:

240

# Upon reaching an equality, check for prior redundancies.

241

if count_delete >= 1 and count_insert >= 1:

242

# Delete the offending records and add the merged ones.

243

a = self.diff_main(text_delete, text_insert, False, deadline)

244

diffs[pointer - count_delete - count_insert : pointer] = a

245

pointer = pointer - count_delete - count_insert + len(a)

246

count_insert = 0

247

count_delete = 0

248

text_delete = ""

249

text_insert = ""

800

250

801

# First, shift the edit as far left as possible.

802

commonOffset = self.diff_commonSuffix(equality1, edit)

803

if commonOffset:

804

commonString = edit[-commonOffset:]

805

equality1 = equality1[:-commonOffset]

806

edit = commonString + edit[:-commonOffset]

807

equality2 = commonString + equality2

251

pointer += 1

252

253

diffs.pop() # Remove the dummy entry at the end.

254

255

return diffs

256

257

def diff_bisect(self, text1, text2, deadline):

258

"""Find the 'middle snake' of a diff, split the problem in two

259

and return the recursively constructed diff.

260

See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.

261

262

Args:

263

text1: Old string to be diffed.

264

text2: New string to be diffed.

265

deadline: Time at which to bail if not yet complete.

266

267

Returns:

268

Array of diff tuples.

269

"""

808

270

809

# Second, step character by character right, looking for the best fit.

810

bestEquality1 = equality1

811

bestEdit = edit

812

bestEquality2 = equality2

813

bestScore = (diff_cleanupSemanticScore(equality1, edit) +

814

diff_cleanupSemanticScore(edit, equality2))

815

while edit and equality2 and edit[0] == equality2[0]:

816

equality1 += edit[0]

817

edit = edit[1:] + equality2[0]

818

equality2 = equality2[1:]

819

score = (diff_cleanupSemanticScore(equality1, edit) +

820

diff_cleanupSemanticScore(edit, equality2))

821

# The >= encourages trailing rather than leading whitespace on edits.

822

if score >= bestScore:

823

bestScore = score

824

bestEquality1 = equality1

825

bestEdit = edit

826

bestEquality2 = equality2

271

# Cache the text lengths to prevent multiple calls.

272

text1_length = len(text1)

273

text2_length = len(text2)

274

max_d = (text1_length + text2_length + 1) // 2

275

v_offset = max_d

276

v_length = 2 * max_d

277

v1 = [-1] * v_length

278

v1[v_offset + 1] = 0

279

v2 = v1[:]

280

delta = text1_length - text2_length

281

# If the total number of characters is odd, then the front path will

282

# collide with the reverse path.

283

front = delta % 2 != 0

284

# Offsets for start and end of k loop.

285

# Prevents mapping of space beyond the grid.

286

k1start = 0

287

k1end = 0

288

k2start = 0

289

k2end = 0

290

for d in range(max_d):

291

# Bail out if deadline is reached.

292

if time.time() > deadline:

293

break

294

295

# Walk the front path one step.

296

for k1 in range(-d + k1start, d + 1 - k1end, 2):

297

k1_offset = v_offset + k1

298

if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]):

299

x1 = v1[k1_offset + 1]

300

else:

301

x1 = v1[k1_offset - 1] + 1

302

y1 = x1 - k1

303

while (

304

x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1]

305

):

306

x1 += 1

307

y1 += 1

308

v1[k1_offset] = x1

309

if x1 > text1_length:

310

# Ran off the right of the graph.

311

k1end += 2

312

elif y1 > text2_length:

313

# Ran off the bottom of the graph.

314

k1start += 2

315

elif front:

316

k2_offset = v_offset + delta - k1

317

if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:

318

# Mirror x2 onto top-left coordinate system.

319

x2 = text1_length - v2[k2_offset]

320

if x1 >= x2:

321

# Overlap detected.

322

return self.diff_bisectSplit(text1, text2, x1, y1, deadline)

827

323

828

if diffs[pointer - 1][1] != bestEquality1:

829

# We have an improvement, save it back to the diff.

830

if bestEquality1:

831

diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)

832

else:

833

del diffs[pointer - 1]

834

pointer -= 1

835

diffs[pointer] = (diffs[pointer][0], bestEdit)

836

if bestEquality2:

837

diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)

838

else:

839

del diffs[pointer + 1]

840

~~pointer~~ -= 1

841

pointer += 1

324

# Walk the reverse path one step.

325

for k2 in range(-d + k2start, d + 1 - k2end, 2):

326

k2_offset = v_offset + k2

327

if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]):

328

x2 = v2[k2_offset + 1]

329

else:

330

x2 = v2[k2_offset - 1] + 1

331

y2 = x2 - k2

332

while (

333

x2 < text1_length

334

and y2 < text2_length

335

and text1[-x2 - 1] == text2[-y2 - 1]

336

):

337

x2 += 1

338

y2 += 1

339

v2[k2_offset] = x2

340

if x2 > text1_length:

341

# Ran off the left of the graph.

342

k2end += 2

343

elif y2 > text2_length:

344

# Ran off the top of the graph.

345

k2start += 2

346

elif not front:

347

k1_offset = v_offset + delta - k2

348

if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:

349

x1 = v1[k1_offset]

350

y1 = v_offset + x1 - k1_offset

351

# Mirror x2 onto top-left coordinate system.

352

x2 = text1_length - x2

353

if x1 >= x2:

354

# Overlap detected.

355

return self.diff_bisectSplit(text1, text2, x1, y1, deadline)

356

357

# Diff took too long and hit the deadline or

358

# number of diffs equals number of characters, no commonality at all.

359

return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]

360

361

def diff_bisectSplit(self, text1, text2, x, y, deadline):

362

"""Given the location of the 'middle snake', split the diff in two parts

363

and recurse.

364

365

Args:

366

text1: Old string to be diffed.

367

text2: New string to be diffed.

368

x: Index of split point in text1.

369

y: Index of split point in text2.

370

deadline: Time at which to bail if not yet complete.

371

372

Returns:

373

Array of diff tuples.

374

"""

375

text1a = text1[:x]

376

text2a = text2[:y]

377

text1b = text1[x:]

378

text2b = text2[y:]

379

380

# Compute both diffs serially.

381

diffs = self.diff_main(text1a, text2a, False, deadline)

382

diffsb = self.diff_main(text1b, text2b, False, deadline)

842

383

843

# Define some regex patterns for matching boundaries.

844

BLANKLINEEND = re.compile(r"\n\r?\n$");

845

BLANKLINESTART = re.compile(r"^\r?\n\r?\n");

384

return diffs + diffsb

385

386

def diff_linesToChars(self, text1, text2):

387

"""Split two texts into an array of strings. Reduce the texts to a string

388

of hashes where each Unicode character represents one line.

389

390

Args:

391

text1: First string.

392

text2: Second string.

393

394

Returns:

395

Three element tuple, containing the encoded text1, the encoded text2 and

396

the array of unique strings. The zeroth element of the array of unique

397

strings is intentionally blank.

398

"""

399

lineArray = [] # e.g. lineArray[4] == "Hello\n"

400

lineHash = {} # e.g. lineHash["Hello\n"] == 4

401

402

# "\x00" is a valid character, but various debuggers don't like it.

403

# So we'll insert a junk entry to avoid generating a null character.

404

lineArray.append("")

405

406

def diff_linesToCharsMunge(text):

407

"""Split a text into an array of strings. Reduce the texts to a string

408

of hashes where each Unicode character represents one line.

409

Modifies linearray and linehash through being a closure.

410

411

Args:

412

text: String to encode.

413

414

Returns:

415

Encoded string.

416

"""

417

chars = []

418

# Walk the text, pulling out a substring for each line.

419

# text.split('\n') would would temporarily double our memory footprint.

420

# Modifying text would create many large strings to garbage collect.

421

lineStart = 0

422

lineEnd = -1

423

while lineEnd < len(text) - 1:

424

lineEnd = text.find("\n", lineStart)

425

if lineEnd == -1:

426

lineEnd = len(text) - 1

427

line = text[lineStart : lineEnd + 1]

428

lineStart = lineEnd + 1

429

430

if line in lineHash:

431

chars.append(chr(lineHash[line]))

432

else:

433

lineArray.append(line)

434

lineHash[line] = len(lineArray) - 1

435

chars.append(chr(len(lineArray) - 1))

436

return "".join(chars)

437

438

chars1 = diff_linesToCharsMunge(text1)

439

chars2 = diff_linesToCharsMunge(text2)

440

return (chars1, chars2, lineArray)

441

442

def diff_charsToLines(self, diffs, lineArray):

443

"""Rehydrate the text in a diff from a string of line hashes to real lines

444

of text.

846

445

847

def diff_cleanupEfficiency(self, diffs):

848

"""Reduce the number of edits by eliminating operationally trivial

849

equalities.

446

Args:

447

diffs: Array of diff tuples.

448

lineArray: Array of unique strings.

449

"""

450

for x in range(len(diffs)):

451

text = []

452

for char in diffs[x][1]:

453

text.append(lineArray[ord(char)])

454

diffs[x] = (diffs[x][0], "".join(text))

455

456

def diff_commonPrefix(self, text1, text2):

457

"""Determine the common prefix of two strings.

458

459

Args:

460

text1: First string.

461

text2: Second string.

462

463

Returns:

464

The number of characters common to the start of each string.

465

"""

466

# Quick check for common null cases.

467

if not text1 or not text2 or text1[0] != text2[0]:

468

return 0

469

# Binary search.

470

# Performance analysis: http://neil.fraser.name/news/2007/10/09/

471

pointermin = 0

472

pointermax = min(len(text1), len(text2))

473

pointermid = pointermax

474

pointerstart = 0

475

while pointermin < pointermid:

476

if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:

477

pointermin = pointermid

478

pointerstart = pointermin

479

else:

480

pointermax = pointermid

481

pointermid = (pointermax - pointermin) // 2 + pointermin

482

return pointermid

483

484

def diff_commonSuffix(self, text1, text2):

485

"""Determine the common suffix of two strings.

486

487

Args:

488

text1: First string.

489

text2: Second string.

850

490

851

Args:

852

diffs: Array of diff tuples.

853

"""

854

changes = False

855

equalities = [] # Stack of indices where equalities are found.

856

lastequality = None # Always equal to diffs[equalities[-1]][1]

857

pointer = 0 # Index of current position.

858

pre_ins = False # Is there an insertion operation before the last equality.

859

pre_del = False # Is there a deletion operation before the last equality.

860

post_ins = False # Is there an insertion operation after the last equality.

861

post_del = False # Is there a deletion operation after the last equality.

862

while pointer < len(diffs):

863

if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.

864

if (len(diffs[pointer][1]) < self.Diff_EditCost and

865

(post_ins or post_del)):

866

# Candidate found.

867

equalities.append(pointer)

868

pre_ins = post_ins

869

~~pre_del~~ = po~~st_del~~

870

lastequality = diffs[pointer][1]

491

Returns:

492

The number of characters common to the end of each string.

493

"""

494

# Quick check for common null cases.

495

if not text1 or not text2 or text1[-1] != text2[-1]:

496

return 0

497

# Binary search.

498

# Performance analysis: http://neil.fraser.name/news/2007/10/09/

499

pointermin = 0

500

pointermax = min(len(text1), len(text2))

501

pointermid = pointermax

502

pointerend = 0

503

while pointermin < pointermid:

504

if (

505

text1[-pointermid : len(text1) - pointerend]

506

== text2[-pointermid : len(text2) - pointerend]

507

):

508

pointermin = pointermid

509

pointerend = pointermin

510

else:

511

pointermax = pointermid

512

pointermid = (pointermax - pointermin) // 2 + pointermin

513

return pointermid

514

515

def diff_commonOverlap(self, text1, text2):

516

"""Determine if the suffix of one string is the prefix of another.

517

518

Args:

519

text1 First string.

520

text2 Second string.

521

522

Returns:

523

The number of characters common to the end of the first

524

string and the start of the second string.

525

"""

526

# Cache the text lengths to prevent multiple calls.

527

text1_length = len(text1)

528

text2_length = len(text2)

529

# Eliminate the null case.

530

if text1_length == 0 or text2_length == 0:

531

return 0

532

# Truncate the longer string.

533

if text1_length > text2_length:

534

text1 = text1[-text2_length:]

535

elif text1_length < text2_length:

536

text2 = text2[:text1_length]

537

text_length = min(text1_length, text2_length)

538

# Quick check for the worst case.

539

if text1 == text2:

540

return text_length

541

542

# Start by looking for a single character match

543

# and increase length until no match is found.

544

# Performance analysis: http://neil.fraser.name/news/2010/11/04/

545

best = 0

546

length = 1

547

while True:

548

pattern = text1[-length:]

549

found = text2.find(pattern)

550

if found == -1:

551

return best

552

length += found

553

if found == 0 or text1[-length:] == text2[:length]:

554

best = length

555

length += 1

556

557

def diff_halfMatch(self, text1, text2):

558

"""Do the two texts share a substring which is at least half the length of

559

the longer text?

560

This speedup can produce non-minimal diffs.

561

562

Args:

563

text1: First string.

564

text2: Second string.

565

566

Returns:

567

Five element Array, containing the prefix of text1, the suffix of text1,

568

the prefix of text2, the suffix of text2 and the common middle. Or None

569

if there was no match.

570

"""

571

if self.Diff_Timeout <= 0:

572

# Don't risk returning a non-optimal diff if we have unlimited time.

573

return None

574

if len(text1) > len(text2):

575

(longtext, shorttext) = (text1, text2)

871

576

else:

872

# Not a candidate, and can never become one.

873

equalities = []

874

lastequality = None

577

(shorttext, longtext) = (text1, text2)

578

if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):

579

return None # Pointless.

875

580

876

post_ins = post_del = False

877

else: # An insertion or deletion.

878

if diffs[pointer][0] == self.DIFF_DELETE:

879

post_del = True

880

else:

881

post_ins = True

581

def diff_halfMatchI(longtext, shorttext, i):

582

"""Does a substring of shorttext exist within longtext such that the

583

substring is at least half the length of longtext?

584

Closure, but does not reference any external variables.

882

585

883

# Five types to be split:

884

# <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>

885

# <ins>A</ins>X<ins>C</ins><del>D</del>

886

# <ins>A</ins><del>B</del>X<ins>C</ins>

887

# <ins>A</del>X<ins>C</ins><del>D</del>

888

# <ins>A</ins><del>B</del>X<del>C</del>

586

Args:

587

longtext: Longer string.

588

shorttext: Shorter string.

589

i: Start index of quarter length substring within longtext.

889

590

890

if lastequality and ((pre_ins and pre_del and post_ins and post_del) or

891

((len(lastequality) < self.Diff_EditCost / 2) and

892

(pre_ins + pre_del + post_ins + post_del) == 3)):

893

# Duplicate record.

894

diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))

895

# Change second copy to insert.

896

diffs[equalities[-1] + 1] = (self.DIFF_INSERT,

897

diffs[equalities[-1] + 1][1])

898

equalities.pop() # Throw away the equality we just deleted.

899

lastequality = None

900

if pre_ins and pre_del:

901

# No changes made which could affect previous entry, keep going.

902

post_ins = post_del = True

903

equalities = []

904

else:

905

if len(equalities):

906

equalities.pop() # Throw away the previous equality.

907

if len(equalities):

908

pointer = equalities[-1]

909

else:

910

pointer = -1

911

post_ins = post_del = False

912

changes = True

913

pointer += 1

914

915

if changes:

916

self.diff_cleanupMerge(diffs)

917

918

def diff_cleanupMerge(self, diffs):

919

"""Reorder and merge like edit sections. Merge equalities.

920

Any edit section can move as long as it doesn't cross an equality.

591

Returns:

592

Five element Array, containing the prefix of longtext, the suffix of

593

longtext, the prefix of shorttext, the suffix of shorttext and the

594

common middle. Or None if there was no match.

595

"""

596

seed = longtext[i : i + len(longtext) // 4]

597

best_common = ""

598

j = shorttext.find(seed)

599

while j != -1:

600

prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])

601

suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])

602

if len(best_common) < suffixLength + prefixLength:

603

best_common = (

604

shorttext[j - suffixLength : j]

605

+ shorttext[j : j + prefixLength]

606

)

607

best_longtext_a = longtext[: i - suffixLength]

608

best_longtext_b = longtext[i + prefixLength :]

609

best_shorttext_a = shorttext[: j - suffixLength]

610

best_shorttext_b = shorttext[j + prefixLength :]

611

j = shorttext.find(seed, j + 1)

921

612

922

Args:

923

diffs: Array of diff tuples.

924

"""

925

diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end.

926

pointer = 0

927

count_delete = 0

928

count_insert = 0

929

text_delete = ''

930

text_insert = ''

931

while pointer < len(diffs):

932

if diffs[pointer][0] == self.DIFF_INSERT:

933

count_insert += 1

934

text_insert += diffs[pointer][1]

935

pointer += 1

936

elif diffs[pointer][0] == self.DIFF_DELETE:

937

count_delete += 1

938

text_delete += diffs[pointer][1]

939

pointer += 1

940

elif diffs[pointer][0] == self.DIFF_EQUAL:

941

# Upon reaching an equality, check for prior redundancies.

942

if count_delete + count_insert > 1:

943

if count_delete != 0 and count_insert != 0:

944

# Factor out any common prefixies.

945

commonlength = self.diff_commonPrefix(text_insert, text_delete)

946

if commonlength != 0:

947

x = pointer - count_delete - count_insert - 1

948

if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:

949

diffs[x] = (diffs[x][0], diffs[x][1] +

950

text_insert[:commonlength])

951

else:

952

diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))

613

if len(best_common) * 2 >= len(longtext):

614

return (

615

best_longtext_a,

616

best_longtext_b,

617

best_shorttext_a,

618

best_shorttext_b,

619

best_common,

620

)

621

else:

622

return None

623

624

# First check if the second quarter is the seed for a half-match.

625

hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)

626

# Check again based on the third quarter.

627

hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)

628

if not hm1 and not hm2:

629

return None

630

elif not hm2:

631

hm = hm1

632

elif not hm1:

633

hm = hm2

634

else:

635

# Both matched. Select the longest.

636

if len(hm1[4]) > len(hm2[4]):

637

hm = hm1

638

else:

639

hm = hm2

640

641

# A half-match was found, sort out the return data.

642

if len(text1) > len(text2):

643

(text1_a, text1_b, text2_a, text2_b, mid_common) = hm

644

else:

645

(text2_a, text2_b, text1_a, text1_b, mid_common) = hm

646

return (text1_a, text1_b, text2_a, text2_b, mid_common)

647

648

def diff_cleanupSemantic(self, diffs):

649

"""Reduce the number of edits by eliminating semantically trivial

650

equalities.

651

652

Args:

653

diffs: Array of diff tuples.

654

"""

655

changes = False

656

equalities = [] # Stack of indices where equalities are found.

657

lastequality = None # Always equal to diffs[equalities[-1]][1]

658

pointer = 0 # Index of current position.

659

# Number of chars that changed prior to the equality.

660

length_insertions1, length_deletions1 = 0, 0

661

# Number of chars that changed after the equality.

662

length_insertions2, length_deletions2 = 0, 0

663

while pointer < len(diffs):

664

if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.

665

equalities.append(pointer)

666

length_insertions1, length_insertions2 = length_insertions2, 0

667

length_deletions1, length_deletions2 = length_deletions2, 0

668

lastequality = diffs[pointer][1]

669

else: # An insertion or deletion.

670

if diffs[pointer][0] == self.DIFF_INSERT:

671

length_insertions2 += len(diffs[pointer][1])

672

else:

673

length_deletions2 += len(diffs[pointer][1])

674

# Eliminate an equality that is smaller or equal to the edits on both

675

# sides of it.

676

if (

677

lastequality

678

and (

679

len(lastequality) <= max(length_insertions1, length_deletions1)

680

)

681

and (

682

len(lastequality) <= max(length_insertions2, length_deletions2)

683

)

684

):

685

# Duplicate record.

686

diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))

687

# Change second copy to insert.

688

diffs[equalities[-1] + 1] = (

689

self.DIFF_INSERT,

690

diffs[equalities[-1] + 1][1],

691

)

692

# Throw away the equality we just deleted.

693

equalities.pop()

694

# Throw away the previous equality (it needs to be reevaluated).

695

if len(equalities):

696

equalities.pop()

697

if len(equalities):

698

pointer = equalities[-1]

699

else:

700

pointer = -1

701

# Reset the counters.

702

length_insertions1, length_deletions1 = 0, 0

703

length_insertions2, length_deletions2 = 0, 0

704

lastequality = None

705

changes = True

706

pointer += 1

707

708

# Normalize the diff.

709

if changes:

710

self.diff_cleanupMerge(diffs)

711

self.diff_cleanupSemanticLossless(diffs)

712

713

# Find any overlaps between deletions and insertions.

714

# e.g: <del>abcxxx</del><ins>xxxdef</ins>

715

# -> <del>abc</del>xxx<ins>def</ins>

716

# e.g: <del>xxxabc</del><ins>defxxx</ins>

717

# -> <ins>def</ins>xxx<del>abc</del>

718

# Only extract an overlap if it is as big as the edit ahead or behind it.

719

pointer = 1

720

while pointer < len(diffs):

721

if (

722

diffs[pointer - 1][0] == self.DIFF_DELETE

723

and diffs[pointer][0] == self.DIFF_INSERT

724

):

725

deletion = diffs[pointer - 1][1]

726

insertion = diffs[pointer][1]

727

overlap_length1 = self.diff_commonOverlap(deletion, insertion)

728

overlap_length2 = self.diff_commonOverlap(insertion, deletion)

729

if overlap_length1 >= overlap_length2:

730

if (

731

overlap_length1 >= len(deletion) / 2.0

732

or overlap_length1 >= len(insertion) / 2.0

733

):

734

# Overlap found. Insert an equality and trim the surrounding edits.

735

diffs.insert(

736

pointer, (self.DIFF_EQUAL, insertion[:overlap_length1])

737

)

738

diffs[pointer - 1] = (

739

self.DIFF_DELETE,

740

deletion[: len(deletion) - overlap_length1],

741

)

742

diffs[pointer + 1] = (

743

self.DIFF_INSERT,

744

insertion[overlap_length1:],

745

)

746

pointer += 1

747

else:

748

if (

749

overlap_length2 >= len(deletion) / 2.0

750

or overlap_length2 >= len(insertion) / 2.0

751

):

752

# Reverse overlap found.

753

# Insert an equality and swap and trim the surrounding edits.

754

diffs.insert(

755

pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])

756

)

757

diffs[pointer - 1] = (

758

self.DIFF_INSERT,

759

insertion[: len(insertion) - overlap_length2],

760

)

761

diffs[pointer + 1] = (

762

self.DIFF_DELETE,

763

deletion[overlap_length2:],

764

)

765

pointer += 1

953

766

pointer += 1

954

text_insert = text_insert[commonlength:]

955

text_delete = text_delete[commonlength:]

956

# Factor out any common suffixies.

957

commonlength = self.diff_commonSuffix(text_insert, text_delete)

958

if commonlength != 0:

959

diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +

960

diffs[pointer][1])

961

text_insert = text_insert[:-commonlength]

962

text_delete = text_delete[:-commonlength]

963

# Delete the offending records and add the merged ones.

964

if count_delete == 0:

965

diffs[pointer - count_insert : pointer] = [

966

(self.DIFF_INSERT, text_insert)]

967

elif count_insert == 0:

968

diffs[pointer - count_delete : pointer] = [

969

(self.DIFF_DELETE, text_delete)]

970

else:

971

diffs[pointer - count_delete - count_insert : pointer] = [

972

(self.DIFF_DELETE, text_delete),

973

(self.DIFF_INSERT, text_insert)]

974

pointer = pointer - count_delete - count_insert + 1

975

if count_delete != 0:

976

767

pointer += 1

977

if count_insert != 0:

978

pointer += 1

979

elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:

980

# Merge this equality with the previous one.

981

diffs[pointer - 1] = (diffs[pointer - 1][0],

982

diffs[pointer - 1][1] + diffs[pointer][1])

983

del diffs[pointer]

984

else:

985

pointer += 1

768

769

def diff_cleanupSemanticLossless(self, diffs):

770

"""Look for single edits surrounded on both sides by equalities

771

which can be shifted sideways to align the edit to a word boundary.

772

e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.

773

774

Args:

775

diffs: Array of diff tuples.

776

"""

777

778

def diff_cleanupSemanticScore(one, two):

779

"""Given two strings, compute a score representing whether the

780

internal boundary falls on logical boundaries.

781

Scores range from 6 (best) to 0 (worst).

782

Closure, but does not reference any external variables.

783

784

Args:

785

one: First string.

786

two: Second string.

787

788

Returns:

789

The score.

790

"""

791

if not one or not two:

792

# Edges are the best.

793

return 6

794

795

# Each port of this function behaves slightly differently due to

796

# subtle differences in each language's definition of things like

797

# 'whitespace'. Since this function's purpose is largely cosmetic,

798

# the choice has been made to use each language's native features

799

# rather than force total conformity.

800

char1 = one[-1]

801

char2 = two[0]

802

nonAlphaNumeric1 = not char1.isalnum()

803

nonAlphaNumeric2 = not char2.isalnum()

804

whitespace1 = nonAlphaNumeric1 and char1.isspace()

805

whitespace2 = nonAlphaNumeric2 and char2.isspace()

806

lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")

807

lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")

808

blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)

809

blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)

810

811

if blankLine1 or blankLine2:

812

# Five points for blank lines.

813

return 5

814

elif lineBreak1 or lineBreak2:

815

# Four points for line breaks.

816

return 4

817

elif nonAlphaNumeric1 and not whitespace1 and whitespace2:

818

# Three points for end of sentences.

819

return 3

820

elif whitespace1 or whitespace2:

821

# Two points for whitespace.

822

return 2

823

elif nonAlphaNumeric1 or nonAlphaNumeric2:

824

# One point for non-alphanumeric.

825

return 1

826

return 0

986

827

987

~~count_insert~~ = 0

988

count_delete = 0

989

text_delete = ''

990

text_insert = ''

828

pointer = 1

829

# Intentionally ignore the first and last element (don't need checking).

830

while pointer < len(diffs) - 1:

831

if (

832

diffs[pointer - 1][0] == self.DIFF_EQUAL

833

and diffs[pointer + 1][0] == self.DIFF_EQUAL

834

):

835

# This is a single edit surrounded by equalities.

836

equality1 = diffs[pointer - 1][1]

837

edit = diffs[pointer][1]

838

equality2 = diffs[pointer + 1][1]

991

839

992

if diffs[-1][1] == '':

993

diffs.pop() # Remove the dummy entry at the end.

840

# First, shift the edit as far left as possible.

841

commonOffset = self.diff_commonSuffix(equality1, edit)

842

if commonOffset:

843

commonString = edit[-commonOffset:]

844

equality1 = equality1[:-commonOffset]

845

edit = commonString + edit[:-commonOffset]

846

equality2 = commonString + equality2

994

847

995

# Second pass: look for single edits surrounded on both sides by equalities

996

# which can be shifted sideways to eliminate an equality.

997

# e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC

998

changes = False

999

pointer = 1

1000

# Intentionally ignore the first and last element (don't need checking).

1001

while pointer < len(diffs) - 1:

1002

if (diffs[pointer - 1][0] == self.DIFF_EQUAL and

1003

diffs[pointer + 1][0] == self.DIFF_EQUAL):

1004

# This is a single edit surrounded by equalities.

1005

if diffs[pointer][1].endswith(diffs[pointer - 1][1]):

1006

# Shift the edit over the previous equality.

1007

diffs[pointer] = (diffs[pointer][0],

1008

diffs[pointer - 1][1] +

1009

diffs[pointer][1][:-len(diffs[pointer - 1][1])])

1010

diffs[pointer + 1] = (diffs[pointer + 1][0],

1011

diffs[pointer - 1][1] + diffs[pointer + 1][1])

1012

del diffs[pointer - 1]

1013

changes = True

1014

elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):

1015

# Shift the edit over the next equality.

1016

~~diffs~~[~~pointer~~ - 1] = (diffs[pointer - 1][0],

1017

diffs[pointer - 1][1] + diffs[pointer + 1][1])

1018

diffs[pointer] = (diffs[pointer][0],

1019

diffs[pointer][1][~~len~~(diffs[pointer + 1][1]):] +

1020

diffs[pointer + 1][1])

1021

del diffs[pointer + 1]

1022

changes = True

1023

pointer += 1

848

# Second, step character by character right, looking for the best fit.

849

bestEquality1 = equality1

850

bestEdit = edit

851

bestEquality2 = equality2

852

bestScore = diff_cleanupSemanticScore(

853

equality1, edit

854

) + diff_cleanupSemanticScore(edit, equality2)

855

while edit and equality2 and edit[0] == equality2[0]:

856

equality1 += edit[0]

857

edit = edit[1:] + equality2[0]

858

equality2 = equality2[1:]

859

score = diff_cleanupSemanticScore(

860

equality1, edit

861

) + diff_cleanupSemanticScore(edit, equality2)

862

# The >= encourages trailing rather than leading whitespace on edits.

863

if score >= bestScore:

864

bestScore = score

865

bestEquality1 = equality1

866

bestEdit = edit

867

bestEquality2 = equality2

868

869

if diffs[pointer - 1][1] != bestEquality1:

870

# We have an improvement, save it back to the diff.

871

if bestEquality1:

872

diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)

873

else:

874

del diffs[pointer - 1]

875

pointer -= 1

876

diffs[pointer] = (diffs[pointer][0], bestEdit)

877

if bestEquality2:

878

diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)

879

else:

880

del diffs[pointer + 1]

881

pointer -= 1

882

pointer += 1

883

884

# Define some regex patterns for matching boundaries.

885

BLANKLINEEND = re.compile(r"\n\r?\n$")

886

BLANKLINESTART = re.compile(r"^\r?\n\r?\n")

887

888

def diff_cleanupEfficiency(self, diffs):

889

"""Reduce the number of edits by eliminating operationally trivial

890

equalities.

1024

891

1025

# If shifts were made, the diff needs reordering and another shift sweep.

1026

if changes:

1027

self.diff_cleanupMerge(diffs)

892

Args:

893

diffs: Array of diff tuples.

894

"""

895

changes = False

896

equalities = [] # Stack of indices where equalities are found.

897

lastequality = None # Always equal to diffs[equalities[-1]][1]

898

pointer = 0 # Index of current position.

899

pre_ins = False # Is there an insertion operation before the last equality.

900

pre_del = False # Is there a deletion operation before the last equality.

901

post_ins = False # Is there an insertion operation after the last equality.

902

post_del = False # Is there a deletion operation after the last equality.

903

while pointer < len(diffs):

904

if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.

905

if len(diffs[pointer][1]) < self.Diff_EditCost and (

906

post_ins or post_del

907

):

908

# Candidate found.

909

equalities.append(pointer)

910

pre_ins = post_ins

911

pre_del = post_del

912

lastequality = diffs[pointer][1]

913

else:

914

# Not a candidate, and can never become one.

915

equalities = []

916

lastequality = None

1028

917

1029

def diff_xIndex(self, diffs, loc):

1030

"""loc is a location in text1, compute and return the equivalent location

1031

in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8

1032

1033

Args:

1034

diffs: Array of diff tuples.

1035

loc: Location within text1.

918

post_ins = post_del = False

919

else: # An insertion or deletion.

920

if diffs[pointer][0] == self.DIFF_DELETE:

921

post_del = True

922

else:

923

post_ins = True

1036

924

1037

Returns:

1038

Location within text2.

1039

"""

1040

chars1 = 0

1041

chars2 = 0

1042

last_chars1 = 0

1043

last_chars2 = 0

1044

for x in range(len(diffs)):

1045

(op, text) = diffs[x]

1046

if op != self.DIFF_INSERT: # Equality or deletion.

1047

chars1 += len(text)

1048

if op != self.DIFF_DELETE: # Equality or insertion.

1049

chars2 += len(text)

1050

if chars1 > loc: # Overshot the location.

1051

break

1052

last_chars1 = chars1

1053

last_chars2 = chars2

925

# Five types to be split:

926

# <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>

927

# <ins>A</ins>X<ins>C</ins><del>D</del>

928

# <ins>A</ins><del>B</del>X<ins>C</ins>

929

# <ins>A</del>X<ins>C</ins><del>D</del>

930

# <ins>A</ins><del>B</del>X<del>C</del>

1054

931

1055

if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:

1056

# The location was deleted.

1057

return last_chars2

1058

# Add the remaining len(character).

1059

return last_chars2 + (loc - last_chars1)

932

if lastequality and (

933

(pre_ins and pre_del and post_ins and post_del)

934

or (

935

(len(lastequality) < self.Diff_EditCost / 2)

936

and (pre_ins + pre_del + post_ins + post_del) == 3

937

)

938

):

939

# Duplicate record.

940

diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))

941

# Change second copy to insert.

942

diffs[equalities[-1] + 1] = (

943

self.DIFF_INSERT,

944

diffs[equalities[-1] + 1][1],

945

)

946

equalities.pop() # Throw away the equality we just deleted.

947

lastequality = None

948

if pre_ins and pre_del:

949

# No changes made which could affect previous entry, keep going.

950

post_ins = post_del = True

951

equalities = []

952

else:

953

if len(equalities):

954

equalities.pop() # Throw away the previous equality.

955

if len(equalities):

956

pointer = equalities[-1]

957

else:

958

pointer = -1

959

post_ins = post_del = False

960

changes = True

961

pointer += 1

1060

962

1061

def diff_prettyHtml(self, diffs):

1062

"""Convert a diff array into a pretty HTML report.

963

if changes:

964

self.diff_cleanupMerge(diffs)

1063

965

1064

Args:

1065

diffs: Array of diff tuples.

966

def diff_cleanupMerge(self, diffs):

967

"""Reorder and merge like edit sections. Merge equalities.

968

Any edit section can move as long as it doesn't cross an equality.

1066

969

1067

Returns:

1068

HTML representation.

1069

"""

1070

html = []

1071

for (op, data) in diffs:

1072

text = (data.replace("&", "&").replace("<", "<")

1073

.replace(">", ">").replace("\n", "¶ "))

1074

if op == self.DIFF_INSERT:

1075

html.append("<ins style=\"background:#e6ffe6;\">%s</ins>" % text)

1076

elif op == self.DIFF_DELETE:

1077

html.append("<del style=\"background:#ffe6e6;\">%s</del>" % text)

1078

elif op == self.DIFF_EQUAL:

1079

html.append("%s" % text)

1080

return "".join(html)

970

Args:

971

diffs: Array of diff tuples.

972

"""

973

diffs.append((self.DIFF_EQUAL, "")) # Add a dummy entry at the end.

974

pointer = 0

975

count_delete = 0

976

count_insert = 0

977

text_delete = ""

978

text_insert = ""

979

while pointer < len(diffs):

980

if diffs[pointer][0] == self.DIFF_INSERT:

981

count_insert += 1

982

text_insert += diffs[pointer][1]

983

pointer += 1

984

elif diffs[pointer][0] == self.DIFF_DELETE:

985

count_delete += 1

986

text_delete += diffs[pointer][1]

987

pointer += 1

988

elif diffs[pointer][0] == self.DIFF_EQUAL:

989

# Upon reaching an equality, check for prior redundancies.

990

if count_delete + count_insert > 1:

991

if count_delete != 0 and count_insert != 0:

992

# Factor out any common prefixies.

993

commonlength = self.diff_commonPrefix(text_insert, text_delete)

994

if commonlength != 0:

995

x = pointer - count_delete - count_insert - 1

996

if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:

997

diffs[x] = (

998

diffs[x][0],

999

diffs[x][1] + text_insert[:commonlength],

1000

)

1001

else:

1002

diffs.insert(

1003

0, (self.DIFF_EQUAL, text_insert[:commonlength])

1004

)

1005

pointer += 1

1006

text_insert = text_insert[commonlength:]

1007

text_delete = text_delete[commonlength:]

1008

# Factor out any common suffixies.

1009

commonlength = self.diff_commonSuffix(text_insert, text_delete)

1010

if commonlength != 0:

1011

diffs[pointer] = (

1012

diffs[pointer][0],

1013

text_insert[-commonlength:] + diffs[pointer][1],

1014

)

1015

text_insert = text_insert[:-commonlength]

1016

text_delete = text_delete[:-commonlength]

1017

# Delete the offending records and add the merged ones.

1018

if count_delete == 0:

1019

diffs[pointer - count_insert : pointer] = [

1020

(self.DIFF_INSERT, text_insert)

1021

]

1022

elif count_insert == 0:

1023

diffs[pointer - count_delete : pointer] = [

1024

(self.DIFF_DELETE, text_delete)

1025

]

1026

else:

1027

diffs[pointer - count_delete - count_insert : pointer] = [

1028

(self.DIFF_DELETE, text_delete),

1029

(self.DIFF_INSERT, text_insert),

1030

]

1031

pointer = pointer - count_delete - count_insert + 1

1032

if count_delete != 0:

1033

pointer += 1

1034

if count_insert != 0:

1035

pointer += 1

1036

elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:

1037

# Merge this equality with the previous one.

1038

diffs[pointer - 1] = (

1039

diffs[pointer - 1][0],

1040

diffs[pointer - 1][1] + diffs[pointer][1],

1041

)

1042

del diffs[pointer]

1043

else:

1044

pointer += 1

1081

1045

1082

def diff_text1(self, diffs):

1083

"""Compute and return the source text (all equalities and deletions).

1084

1085

Args:

1086

diffs: Array of diff tuples.

1046

count_insert = 0

1047

count_delete = 0

1048

text_delete = ""

1049

text_insert = ""

1087

1050

1088

Returns:

1089

Source text.

1090

"""

1091

text = []

1092

for (op, data) in diffs:

1093

if op != self.DIFF_INSERT:

1094

text.append(data)

1095

return "".join(text)

1096

1097

def diff_text2(self, diffs):

1098

"""Compute and return the destination text (all equalities and insertions).

1051

if diffs[-1][1] == "":

1052

diffs.pop() # Remove the dummy entry at the end.

1099

1053

1100

Args:

1101

diffs: Array of diff tuples.

1054

# Second pass: look for single edits surrounded on both sides by equalities

1055

# which can be shifted sideways to eliminate an equality.

1056

# e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC

1057

changes = False

1058

pointer = 1

1059

# Intentionally ignore the first and last element (don't need checking).

1060

while pointer < len(diffs) - 1:

1061

if (

1062

diffs[pointer - 1][0] == self.DIFF_EQUAL

1063

and diffs[pointer + 1][0] == self.DIFF_EQUAL

1064

):

1065

# This is a single edit surrounded by equalities.

1066

if diffs[pointer][1].endswith(diffs[pointer - 1][1]):

1067

# Shift the edit over the previous equality.

1068

diffs[pointer] = (

1069

diffs[pointer][0],

1070

diffs[pointer - 1][1]

1071

+ diffs[pointer][1][: -len(diffs[pointer - 1][1])],

1072

)

1073

diffs[pointer + 1] = (

1074

diffs[pointer + 1][0],

1075

diffs[pointer - 1][1] + diffs[pointer + 1][1],

1076

)

1077

del diffs[pointer - 1]

1078

changes = True

1079

elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):

1080

# Shift the edit over the next equality.

1081

diffs[pointer - 1] = (

1082

diffs[pointer - 1][0],

1083

diffs[pointer - 1][1] + diffs[pointer + 1][1],

1084

)

1085

diffs[pointer] = (

1086

diffs[pointer][0],

1087

diffs[pointer][1][len(diffs[pointer + 1][1]) :]

1088

+ diffs[pointer + 1][1],

1089

)

1090

del diffs[pointer + 1]

1091

changes = True

1092

pointer += 1

1102

1093

1103

Returns:

1104

Destination text.

1105

"""

1106

text = []

1107

for (op, data) in diffs:

1108

if op != self.DIFF_DELETE:

1109

text.append(data)

1110

return "".join(text)

1094

# If shifts were made, the diff needs reordering and another shift sweep.

1095

if changes:

1096

self.diff_cleanupMerge(diffs)

1097

1098

def diff_xIndex(self, diffs, loc):

1099

"""loc is a location in text1, compute and return the equivalent location

1100

in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8

1101

1102

Args:

1103

diffs: Array of diff tuples.

1104

loc: Location within text1.

1111

1105

1112

def diff_levenshtein(self, diffs):

1113

"""Compute the Levenshtein distance; the number of inserted, deleted or

1114

substituted characters.

1106

Returns:

1107

Location within text2.

1108

"""

1109

chars1 = 0

1110

chars2 = 0

1111

last_chars1 = 0

1112

last_chars2 = 0

1113

for x in range(len(diffs)):

1114

(op, text) = diffs[x]

1115

if op != self.DIFF_INSERT: # Equality or deletion.

1116

chars1 += len(text)

1117

if op != self.DIFF_DELETE: # Equality or insertion.

1118

chars2 += len(text)

1119

if chars1 > loc: # Overshot the location.

1120

break

1121

last_chars1 = chars1

1122

last_chars2 = chars2

1115

1123

1116

Args:

1117

diffs: Array of diff tuples.

1124

if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:

1125

# The location was deleted.

1126

return last_chars2

1127

# Add the remaining len(character).

1128

return last_chars2 + (loc - last_chars1)

1129

1130

def diff_prettyHtml(self, diffs):

1131

"""Convert a diff array into a pretty HTML report.

1132

1133

Args:

1134

diffs: Array of diff tuples.

1135

1136

Returns:

1137

HTML representation.

1138

"""

1139

html = []

1140

for op, data in diffs:

1141

text = (

1142

data.replace("&", "&")

1143

.replace("<", "<")

1144

.replace(">", ">")

1145

.replace("\n", "¶ ")

1146

)

1147

if op == self.DIFF_INSERT:

1148

html.append('<ins style="background:#e6ffe6;">%s</ins>' % text)

1149

elif op == self.DIFF_DELETE:

1150

html.append('<del style="background:#ffe6e6;">%s</del>' % text)

1151

elif op == self.DIFF_EQUAL:

1152

html.append("%s" % text)

1153

return "".join(html)

1154

1155

def diff_text1(self, diffs):

1156

"""Compute and return the source text (all equalities and deletions).

1118

1157

1119

Returns:

1120

Number of changes.

1121

"""

1122

levenshtein = 0

1123

insertions = 0

1124

deletions = 0

1125

for (op, data) in diffs:

1126

if op == self.DIFF_INSERT:

1127

insertions += len(data)

1128

elif op == self.DIFF_DELETE:

1129

deletions += len(data)

1130

elif op == self.DIFF_EQUAL:

1131

# A deletion and an insertion is one substitution.

1132

levenshtein += max(insertions, deletions)

1158

Args:

1159

diffs: Array of diff tuples.

1160

1161

Returns:

1162

Source text.

1163

"""

1164

text = []

1165

for op, data in diffs:

1166

if op != self.DIFF_INSERT:

1167

text.append(data)

1168

return "".join(text)

1169

1170

def diff_text2(self, diffs):

1171

"""Compute and return the destination text (all equalities and insertions).

1172

1173

Args:

1174

diffs: Array of diff tuples.

1175

1176

Returns:

1177

Destination text.

1178

"""

1179

text = []

1180

for op, data in diffs:

1181

if op != self.DIFF_DELETE:

1182

text.append(data)

1183

return "".join(text)

1184

1185

def diff_levenshtein(self, diffs):

1186

"""Compute the Levenshtein distance; the number of inserted, deleted or

1187

substituted characters.

1188

1189

Args:

1190

diffs: Array of diff tuples.

1191

1192

Returns:

1193

Number of changes.

1194

"""

1195

levenshtein = 0

1133

1196

insertions = 0

1134

1197

deletions = 0

1135

levenshtein += max(insertions, deletions)

1136

return levenshtein

1198

for op, data in diffs:

1199

if op == self.DIFF_INSERT:

1200

insertions += len(data)

1201

elif op == self.DIFF_DELETE:

1202

deletions += len(data)

1203

elif op == self.DIFF_EQUAL:

1204

# A deletion and an insertion is one substitution.

1205

levenshtein += max(insertions, deletions)

1206

insertions = 0

1207

deletions = 0

1208

levenshtein += max(insertions, deletions)

1209

return levenshtein

1137

1210

1138

def diff_toDelta(self, diffs):

1139

"""Crush the diff into an encoded string which describes the operations

1140

required to transform text1 into text2.

1141

E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.

1142

Operations are tab-separated. Inserted text is escaped using %xx notation.

1211

def diff_toDelta(self, diffs):

1212

"""Crush the diff into an encoded string which describes the operations

1213

required to transform text1 into text2.

1214

E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.

1215

Operations are tab-separated. Inserted text is escaped using %xx notation.

1143

1216

1144

Args:

1145

diffs: Array of diff tuples.

1217

Args:

1218

diffs: Array of diff tuples.

1146

1219

1147

Returns:

1148

Delta text.

1149

"""

1150

text = []

1151

for (op, data) in diffs:

1152

if op == self.DIFF_INSERT:

1153

# High ascii will raise UnicodeDecodeError. Use Unicode instead.

1154

data = data.encode("utf-8")

1155

text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# "))

1156

elif op == self.DIFF_DELETE:

1157

text.append("-%d" % len(data))

1158

elif op == self.DIFF_EQUAL:

1159

text.append("=%d" % len(data))

1160

return "\t".join(text)

1220

Returns:

1221

Delta text.

1222

"""

1223

text = []

1224

for op, data in diffs:

1225

if op == self.DIFF_INSERT:

1226

# High ascii will raise UnicodeDecodeError. Use Unicode instead.

1227

data = data.encode("utf-8")

1228

text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# "))

1229

elif op == self.DIFF_DELETE:

1230

text.append("-%d" % len(data))

1231

elif op == self.DIFF_EQUAL:

1232

text.append("=%d" % len(data))

1233

return "\t".join(text)

1161

1234

1162

def diff_fromDelta(self, text1, delta):

1163

"""Given the original text1, and an encoded string which describes the

1164

operations required to transform text1 into text2, compute the full diff.

1235

def diff_fromDelta(self, text1, delta):

1236

"""Given the original text1, and an encoded string which describes the

1237

operations required to transform text1 into text2, compute the full diff.

1165

1238

1166

Args:

1167

text1: Source string for the diff.

1168

delta: Delta text.

1169

1170

Returns:

1171

Array of diff tuples.

1239

Args:

1240

text1: Source string for the diff.

1241

delta: Delta text.

1172

1242

1173

Raises:

1174

ValueError: If invalid input.

1175

"""

1176

if type(delta) == unicode:

1177

# Deltas should be composed of a subset of ascii chars, Unicode not

1178

# required. If this encode raises UnicodeEncodeError, delta is invalid.

1179

delta = delta.encode("ascii")

1180

diffs = []

1181

pointer = 0 # Cursor in text1

1182

tokens = delta.split("\t")

1183

for token in tokens:

1184

if token == "":

1185

# Blank tokens are ok (from a trailing \t).

1186

continue

1187

# Each token begins with a one character parameter which specifies the

1188

# operation of this token (delete, insert, equality).

1189

param = token[1:]

1190

if token[0] == "+":

1191

param = urllib.parse.unquote(param).decode("utf-8")

1192

diffs.append((self.DIFF_INSERT, param))

1193

elif token[0] == "-" or token[0] == "=":

1194

try:

1195

n = int(param)

1196

except ValueError:

1197

raise ValueError("Invalid number in diff_fromDelta: " + param)

1198

if n < 0:

1199

raise ValueError("Negative number in diff_fromDelta: " + param)

1200

text = text1[pointer : pointer + n]

1201

pointer += n

1202

if token[0] == "=":

1203

diffs.append((self.DIFF_EQUAL, text))

1204

else:

1205

diffs.append((self.DIFF_DELETE, text))

1206

else:

1207

# Anything else is an error.

1208

raise ValueError("Invalid diff operation in diff_fromDelta: " +

1209

token[0])

1210

if pointer != len(text1):

1211

raise ValueError(

1212

"Delta length (%d) does not equal source text length (%d)." %

1213

(pointer, len(text1)))

1214

return diffs

1243

Returns:

1244

Array of diff tuples.

1215

1245

1216

# MATCH FUNCTIONS

1246

Raises:

1247

ValueError: If invalid input.

1248

"""

1249

if type(delta) == str:

1250

# Deltas should be composed of a subset of ascii chars, Unicode not

1251

# required. If this encode raises UnicodeEncodeError, delta is invalid.

1252

delta = delta.encode("ascii")

1253

diffs = []

1254

pointer = 0 # Cursor in text1

1255

tokens = delta.split("\t")

1256

for token in tokens:

1257

if token == "":

1258

# Blank tokens are ok (from a trailing \t).

1259

continue

1260

# Each token begins with a one character parameter which specifies the

1261

# operation of this token (delete, insert, equality).

1262

param = token[1:]

1263

if token[0] == "+":

1264

param = urllib.parse.unquote(param)

1265

diffs.append((self.DIFF_INSERT, param))

1266

elif token[0] == "-" or token[0] == "=":

1267

try:

1268

n = int(param)

1269

except ValueError:

1270

raise ValueError("Invalid number in diff_fromDelta: " + param)

1271

if n < 0:

1272

raise ValueError("Negative number in diff_fromDelta: " + param)

1273

text = text1[pointer : pointer + n]

1274

pointer += n

1275

if token[0] == "=":

1276

diffs.append((self.DIFF_EQUAL, text))

1277

else:

1278

diffs.append((self.DIFF_DELETE, text))

1279

else:

1280

# Anything else is an error.

1281

raise ValueError(

1282

"Invalid diff operation in diff_fromDelta: " + token[0]

1283

)

1284

if pointer != len(text1):

1285

raise ValueError(

1286

"Delta length (%d) does not equal source text length (%d)."

1287

% (pointer, len(text1))

1288

)

1289

return diffs

1217

1290

1218

def match_main(self, text, pattern, loc):

1219

"""Locate the best instance of 'pattern' in 'text' near 'loc'.

1291

# MATCH FUNCTIONS

1220

1292

1221

Args:

1222

text: The text to search.

1223

pattern: The pattern to search for.

1224

loc: The location to search around.

1293

def match_main(self, text, pattern, loc):

1294

"""Locate the best instance of 'pattern' in 'text' near 'loc'.

1225

1295

1226

Returns:

1227

Best match index or -1.

1228

"""

1229

# Check for null inputs.

1230

if text is None or pattern is None:

1231

raise ValueError("Null inputs. (match_main)")

1296

Args:

1297

text: The text to search.

1298

pattern: The pattern to search for.

1299

loc: The location to search around.

1300

1301

Returns:

1302

Best match index or -1.

1303

"""

1304

# Check for null inputs.

1305

if text is None or pattern is None:

1306

raise ValueError("Null inputs. (match_main)")

1232

1307

1233

loc = max(0, min(loc, len(text)))

1234

if text == pattern:

1235

# Shortcut (potentially not guaranteed by the algorithm)

1236

return 0

1237

elif not text:

1238

# Nothing to match.

1239

return -1

1240

elif text[loc:loc + len(pattern)] == pattern:

1241

# Perfect match at the perfect spot! (Includes case of null pattern)

1242

return loc

1243

else:

1244

# Do a fuzzy compare.

1245

match = self.match_bitap(text, pattern, loc)

1246

return match

1308

loc = max(0, min(loc, len(text)))

1309

if text == pattern:

1310

# Shortcut (potentially not guaranteed by the algorithm)

1311

return 0

1312

elif not text:

1313

# Nothing to match.

1314

return -1

1315

elif text[loc : loc + len(pattern)] == pattern:

1316

# Perfect match at the perfect spot! (Includes case of null pattern)

1317

return loc

1318

else:

1319

# Do a fuzzy compare.

1320

match = self.match_bitap(text, pattern, loc)

1321

return match

1247

1322

1248

def match_bitap(self, text, pattern, loc):

1249

"""Locate the best instance of 'pattern' in 'text' near 'loc' using the

1250

Bitap algorithm.

1323

def match_bitap(self, text, pattern, loc):

1324

"""Locate the best instance of 'pattern' in 'text' near 'loc' using the

1325

Bitap algorithm.

1251

1326

1252

Args:

1253

text: The text to search.

1254

pattern: The pattern to search for.

1255

loc: The location to search around.

1327

Args:

1328

text: The text to search.

1329

pattern: The pattern to search for.

1330

loc: The location to search around.

1256

1331

1257

Returns:

1258

Best match index or -1.

1259

"""

1260

# Python doesn't have a maxint limit, so ignore this check.

1261

#if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:

1262

# raise ValueError("Pattern too long for this application.")

1332

Returns:

1333

Best match index or -1.

1334

"""

1335

# Python doesn't have a maxint limit, so ignore this check.

1336

# if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:

1337

# raise ValueError("Pattern too long for this application.")

1263

1338

1264

# Initialise the alphabet.

1265

s = self.match_alphabet(pattern)

1339

# Initialise the alphabet.

1340

s = self.match_alphabet(pattern)

1266

1341

1267

def match_bitapScore(e, x):

1268

"""Compute and return the score for a match with e errors and x location.

1269

Accesses loc and pattern through being a closure.

1342

def match_bitapScore(e, x):

1343

"""Compute and return the score for a match with e errors and x location.

1344

Accesses loc and pattern through being a closure.

1270

1345

1271

Args:

1272

e: Number of errors in match.

1273

x: Location of match.

1346

Args:

1347

e: Number of errors in match.

1348

x: Location of match.

1274

1349

1275

Returns:

1276

Overall score for match (0.0 = good, 1.0 = bad).

1277

"""

1278

accuracy = float(e) / len(pattern)

1279

proximity = abs(loc - x)

1280

if not self.Match_Distance:

1281

# Dodge divide by zero error.

1282

return proximity and 1.0 or accuracy

1283

return accuracy + (proximity / float(self.Match_Distance))

1350

Returns:

1351

Overall score for match (0.0 = good, 1.0 = bad).

1352

"""

1353

accuracy = float(e) / len(pattern)

1354

proximity = abs(loc - x)

1355

if not self.Match_Distance:

1356

# Dodge divide by zero error.

1357

return proximity and 1.0 or accuracy

1358

return accuracy + (proximity / float(self.Match_Distance))

1284

1359

1285

# Highest score beyond which we give up.

1286

score_threshold = self.Match_Threshold

1287

# Is there a nearby exact match? (speedup)

1288

best_loc = text.find(pattern, loc)

1289

if best_loc != -1:

1290

score_threshold = min(match_bitapScore(0, best_loc), score_threshold)

1291

# What about in the other direction? (speedup)

1292

best_loc = text.rfind(pattern, loc + len(pattern))

1293

if best_loc != -1:

1294

score_threshold = min(match_bitapScore(0, best_loc), score_threshold)

1360

# Highest score beyond which we give up.

1361

score_threshold = self.Match_Threshold

1362

# Is there a nearby exact match? (speedup)

1363

best_loc = text.find(pattern, loc)

1364

if best_loc != -1:

1365

score_threshold = min(match_bitapScore(0, best_loc), score_threshold)

1366

# What about in the other direction? (speedup)

1367

best_loc = text.rfind(pattern, loc + len(pattern))

1368

if best_loc != -1:

1369

score_threshold = min(match_bitapScore(0, best_loc), score_threshold)

1295

1370

1296

# Initialise the bit arrays.

1297

matchmask = 1 << (len(pattern) - 1)

1298

best_loc = -1

1371

# Initialise the bit arrays.

1372

matchmask = 1 << (len(pattern) - 1)

1373

best_loc = -1

1299

1374

1300

bin_max = len(pattern) + len(text)

1301

# Empty initialization added to appease pychecker.

1302

last_rd = None

1303

for d in range(len(pattern)):

1304

# Scan for the best match each iteration allows for one more error.

1305

# Run a binary search to determine how far from 'loc' we can stray at

1306

# this error level.

1307

bin_min = 0

1308

bin_mid = bin_max

1309

while bin_min < bin_mid:

1310

if match_bitapScore(d, loc + bin_mid) <= score_threshold:

1311

bin_min = bin_mid

1312

else:

1313

bin_max = bin_mid

1314

bin_mid = (bin_max - bin_min) // 2 + bin_min

1375

bin_max = len(pattern) + len(text)

1376

# Empty initialization added to appease pychecker.

1377

last_rd = None

1378

for d in range(len(pattern)):

1379

# Scan for the best match each iteration allows for one more error.

1380

# Run a binary search to determine how far from 'loc' we can stray at

1381

# this error level.

1382

bin_min = 0

1383

bin_mid = bin_max

1384

while bin_min < bin_mid:

1385

if match_bitapScore(d, loc + bin_mid) <= score_threshold:

1386

bin_min = bin_mid

1387

else:

1388

bin_max = bin_mid

1389

bin_mid = (bin_max - bin_min) // 2 + bin_min

1315

1390

1316

# Use the result from this iteration as the maximum for the next.

1317

bin_max = bin_mid

1318

start = max(1, loc - bin_mid + 1)

1319

finish = min(loc + bin_mid, len(text)) + len(pattern)

1391

# Use the result from this iteration as the maximum for the next.

1392

bin_max = bin_mid

1393

start = max(1, loc - bin_mid + 1)

1394

finish = min(loc + bin_mid, len(text)) + len(pattern)

1320

1395

1321

rd = [0] * (finish + 2)

1322

rd[finish + 1] = (1 << d) - 1

1323

for j in range(finish, start - 1, -1):

1324

if len(text) <= j - 1:

1325

# Out of range.

1326

charMatch = 0

1327

else:

1328

charMatch = s.get(text[j - 1], 0)

1329

if d == 0: # First pass: exact match.

1330

rd[j] = ((rd[j + 1] << 1) | 1) & charMatch

1331

else: # Subsequent passes: fuzzy match.

1332

rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | (

1333

((~~last_~~rd[j + 1] | ~~last_rd~~[j]) << 1) | 1) | ~~last_rd~~[j + 1]

1334

if rd[j] & matchmask:

1335

score = match_bitapScore(d, j - 1)

1336

# This match will almost certainly be better than any existing match.

1337

# But check anyway.

1338

if score <= score_threshold:

1339

# Told you so.

1340

score_threshold = score

1341

best_loc = j - 1

1342

if best_loc > loc:

1343

# When passing loc, don't exceed our current distance from loc.

1344

~~start~~ = ~~max~~(1, 2 * ~~loc~~ - ~~best_loc~~)

1345

else:

1346

# Already passed loc, downhill from here on in.

1347

break

1348

# No hope for a (better) match at greater error levels.

1349

if match_bitapScore(d + 1, loc) > score_threshold:

1350

break

1351

last_rd = rd

1352

return best_loc

1396

rd = [0] * (finish + 2)

1397

rd[finish + 1] = (1 << d) - 1

1398

for j in range(finish, start - 1, -1):

1399

if len(text) <= j - 1:

1400

# Out of range.

1401

charMatch = 0

1402

else:

1403

charMatch = s.get(text[j - 1], 0)

1404

if d == 0: # First pass: exact match.

1405

rd[j] = ((rd[j + 1] << 1) | 1) & charMatch

1406

else: # Subsequent passes: fuzzy match.

1407

rd[j] = (

1408

(((rd[j + 1] << 1) | 1) & charMatch)

1409

| (((last_rd[j + 1] | last_rd[j]) << 1) | 1)

1410

| last_rd[j + 1]

1411

)

1412

if rd[j] & matchmask:

1413

score = match_bitapScore(d, j - 1)

1414

# This match will almost certainly be better than any existing match.

1415

# But check anyway.

1416

if score <= score_threshold:

1417

# Told you so.

1418

score_threshold = score

1419

best_loc = j - 1

1420

if best_loc > loc:

1421

# When passing loc, don't exceed our current distance from loc.

1422

start = max(1, 2 * loc - best_loc)

1423

else:

1424

# Already passed loc, downhill from here on in.

1425

break

1426

# No hope for a (better) match at greater error levels.

1427

if match_bitapScore(d + 1, loc) > score_threshold:

1428

break

1429

last_rd = rd

1430

return best_loc

1353

1431

1354

def match_alphabet(self, pattern):

1355

"""Initialise the alphabet for the Bitap algorithm.

1432

def match_alphabet(self, pattern):

1433

"""Initialise the alphabet for the Bitap algorithm.

1356

1434

1357

Args:

1358

pattern: The text to encode.

1435

Args:

1436

pattern: The text to encode.

1359

1437

1360

Returns:

1361

Hash of character locations.

1362

"""

1363

s = {}

1364

for char in pattern:

1365

s[char] = 0

1366

for i in range(len(pattern)):

1367

s[pattern[i]] |= 1 << (len(pattern) - i - 1)

1368

return s

1438

Returns:

1439

Hash of character locations.

1440

"""

1441

s = {}

1442

for char in pattern:

1443

s[char] = 0

1444

for i in range(len(pattern)):

1445

s[pattern[i]] |= 1 << (len(pattern) - i - 1)

1446

return s

1369

1447

1370

# PATCH FUNCTIONS

1448

# PATCH FUNCTIONS

1371

1449

1372

def patch_addContext(self, patch, text):

1373

"""Increase the context until it is unique,

1374

but don't let the pattern expand beyond Match_MaxBits.

1450

def patch_addContext(self, patch, text):

1451

"""Increase the context until it is unique,

1452

but don't let the pattern expand beyond Match_MaxBits.

1375

1453

1376

Args:

1377

patch: The patch to grow.

1378

text: Source text.

1379

"""

1380

if len(text) == 0:

1381

return

1382

pattern = text[patch.start2 : patch.start2 + patch.length1]

1383

padding = 0

1454

Args:

1455

patch: The patch to grow.

1456

text: Source text.

1457

"""

1458

if len(text) == 0:

1459

return

1460

pattern = text[patch.start2 : patch.start2 + patch.length1]

1461

padding = 0

1384

1462

1385

# Look for the first and last matches of pattern in text. If two different

1386

# matches are found, increase the pattern length.

1387

while (text.find(pattern) != text.rfind(pattern) and (~~self~~.~~Match_MaxBits~~ ==

1388

0 or ~~len~~(~~pattern~~) < self.Match_MaxBits - ~~self~~.~~Patch_Margin~~ -

1389

self.Patch_Margin)):

1390

padding += self.Patch_Margin

1391

pattern = text[max(0, patch.start2 - padding) :

1392

patch.start2 + patch.length1 + padding]

1393

# Add one chunk for good luck.

1394

padding += self.Patch_Margin

1463

# Look for the first and last matches of pattern in text. If two different

1464

# matches are found, increase the pattern length.

1465

while text.find(pattern) != text.rfind(pattern) and (

1466

self.Match_MaxBits == 0

1467

or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin

1468

):

1469

padding += self.Patch_Margin

1470

pattern = text[

1471

max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding

1472

]

1473

# Add one chunk for good luck.

1474

padding += self.Patch_Margin

1395

1475

1396

# Add the prefix.

1397

prefix = text[max(0, patch.start2 - padding) : patch.start2]

1398

if prefix:

1399

patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]

1400

# Add the suffix.

1401

suffix = text[patch.start2 + patch.length1 :

1402

patch.start2 + patch.length1 + padding]

1403

if suffix:

1404

patch.diffs.append((self.DIFF_EQUAL, suffix))

1476

# Add the prefix.

1477

prefix = text[max(0, patch.start2 - padding) : patch.start2]

1478

if prefix:

1479

patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]

1480

# Add the suffix.

1481

suffix = text[

1482

patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding

1483

]

1484

if suffix:

1485

patch.diffs.append((self.DIFF_EQUAL, suffix))

1405

1486

1406

# Roll back the start points.

1407

patch.start1 -= len(prefix)

1408

patch.start2 -= len(prefix)

1409

# Extend lengths.

1410

patch.length1 += len(prefix) + len(suffix)

1411

patch.length2 += len(prefix) + len(suffix)

1487

# Roll back the start points.

1488

patch.start1 -= len(prefix)

1489

patch.start2 -= len(prefix)

1490

# Extend lengths.

1491

patch.length1 += len(prefix) + len(suffix)

1492

patch.length2 += len(prefix) + len(suffix)

1412

1493

1413

def patch_make(self, a, b=None, c=None):

1414

"""Compute a list of patches to turn text1 into text2.

1415

Use diffs if provided, otherwise compute it ourselves.

1416

There are four ways to call this function, depending on what data is

1417

available to the caller:

1418

Method 1:

1419

a = text1, b = text2

1420

Method 2:

1421

a = diffs

1422

Method 3 (optimal):

1423

a = text1, b = diffs

1424

Method 4 (deprecated, use method 3):

1425

a = text1, b = text2, c = diffs

1494

def patch_make(self, a, b=None, c=None):

1495

"""Compute a list of patches to turn text1 into text2.

1496

Use diffs if provided, otherwise compute it ourselves.

1497

There are four ways to call this function, depending on what data is

1498

available to the caller:

1499

Method 1:

1500

a = text1, b = text2

1501

Method 2:

1502

a = diffs

1503

Method 3 (optimal):

1504

a = text1, b = diffs

1505

Method 4 (deprecated, use method 3):

1506

a = text1, b = text2, c = diffs

1426

1507

1427

Args:

1428

a: text1 (methods 1,3,4) or Array of diff tuples for text1 to

1429

text2 (method 2).

1430

b: text2 (methods 1,4) or Array of diff tuples for text1 to

1431

text2 (method 3) or undefined (method 2).

1432

c: Array of diff tuples for text1 to text2 (method 4) or

1433

undefined (methods 1,2,3).

1508

Args:

1509

a: text1 (methods 1,3,4) or Array of diff tuples for text1 to

1510

text2 (method 2).

1511

b: text2 (methods 1,4) or Array of diff tuples for text1 to

1512

text2 (method 3) or undefined (method 2).

1513

c: Array of diff tuples for text1 to text2 (method 4) or

1514

undefined (methods 1,2,3).

1434

1515

1435

Returns:

1436

Array of Patch objects.

1437

"""

1438

text1 = None

1439

diffs = None

1440

# Note that texts may arrive as 'str' or 'unicode'.

1441

if isinstance(a, str) and isinstance(b, str) and c is None:

1442

# Method 1: text1, text2

1443

# Compute diffs from text1 and text2.

1444

text1 = a

1445

diffs = self.diff_main(text1, b, True)

1446

if len(diffs) > 2:

1447

self.diff_cleanupSemantic(diffs)

1448

self.diff_cleanupEfficiency(diffs)

1449

elif isinstance(a, list) and b is None and c is None:

1450

# Method 2: diffs

1451

# Compute text1 from diffs.

1452

diffs = a

1453

text1 = self.diff_text1(diffs)

1454

elif isinstance(a, str) and isinstance(b, list) and c is None:

1455

# Method 3: text1, diffs

1456

text1 = a

1457

diffs = b

1458

elif (isinstance(a, str) and isinstance(b, str) and

1459

isinstance(c, list)):

1460

# Method 4: text1, text2, diffs

1461

# text2 is not used.

1462

text1 = a

1463

diffs = c

1464

else:

1465

raise ValueError("Unknown call format to patch_make.")

1516

Returns:

1517

Array of Patch objects.

1518

"""

1519

text1 = None

1520

diffs = None

1521

# Note that texts may arrive as 'str' or 'unicode'.

1522

if isinstance(a, str) and isinstance(b, str) and c is None:

1523

# Method 1: text1, text2

1524

# Compute diffs from text1 and text2.

1525

text1 = a

1526

diffs = self.diff_main(text1, b, True)

1527

if len(diffs) > 2:

1528

self.diff_cleanupSemantic(diffs)

1529

self.diff_cleanupEfficiency(diffs)

1530

elif isinstance(a, list) and b is None and c is None:

1531

# Method 2: diffs

1532

# Compute text1 from diffs.

1533

diffs = a

1534

text1 = self.diff_text1(diffs)

1535

elif isinstance(a, str) and isinstance(b, list) and c is None:

1536

# Method 3: text1, diffs

1537

text1 = a

1538

diffs = b

1539

elif isinstance(a, str) and isinstance(b, str) and isinstance(c, list):

1540

# Method 4: text1, text2, diffs

1541

# text2 is not used.

1542

text1 = a

1543

diffs = c

1544

else:

1545

raise ValueError("Unknown call format to patch_make.")

1466

1546

1467

if not diffs:

1468

return [] # Get rid of the None case.

1469

patches = []

1470

patch = patch_obj()

1471

char_count1 = 0 # Number of characters into the text1 string.

1472

char_count2 = 0 # Number of characters into the text2 string.

1473

prepatch_text = text1 # Recreate the patches to determine context info.

1474

postpatch_text = text1

1475

for x in range(len(diffs)):

1476

(diff_type, diff_text) = diffs[x]

1477

if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:

1478

# A new patch starts here.

1479

patch.start1 = char_count1

1480

patch.start2 = char_count2

1481

if diff_type == self.DIFF_INSERT:

1482

# Insertion

1483

patch.diffs.append(diffs[x])

1484

patch.length2 += len(diff_text)

1485

postpatch_text = (postpatch_text[:char_count2] + diff_text +

1486

postpatch_text[char_count2:])

1487

elif diff_type == self.DIFF_DELETE:

1488

# Deletion.

1489

patch.length1 += len(diff_text)

1490

patch.diffs.append(diffs[x])

1491

postpatch_text = (postpatch_text[:char_count2] +

1492

~~postpatch_text~~[~~char_count2~~ + len(diff_text)~~:])~~

1493

elif (diff_type == self.DIFF_EQUAL and

1494

len(diff_text) <= 2 * self.Patch_Margin and

1495

len(patch.diffs) != 0 and len(diffs) != x + 1):

1496

# Small equality inside a patch.

1497

patch.diffs.append(diffs[x])

1498

patch.length1 += len(diff_text)

1499

patch.length2 += len(diff_text)

1547

if not diffs:

1548

return [] # Get rid of the None case.

1549

patches = []

1550

patch = patch_obj()

1551

char_count1 = 0 # Number of characters into the text1 string.

1552

char_count2 = 0 # Number of characters into the text2 string.

1553

prepatch_text = text1 # Recreate the patches to determine context info.

1554

postpatch_text = text1

1555

for x in range(len(diffs)):

1556

(diff_type, diff_text) = diffs[x]

1557

if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:

1558

# A new patch starts here.

1559

patch.start1 = char_count1

1560

patch.start2 = char_count2

1561

if diff_type == self.DIFF_INSERT:

1562

# Insertion

1563

patch.diffs.append(diffs[x])

1564

patch.length2 += len(diff_text)

1565

postpatch_text = (

1566

postpatch_text[:char_count2]

1567

+ diff_text

1568

+ postpatch_text[char_count2:]

1569

)

1570

elif diff_type == self.DIFF_DELETE:

1571

# Deletion.

1572

patch.length1 += len(diff_text)

1573

patch.diffs.append(diffs[x])

1574

postpatch_text = (

1575

postpatch_text[:char_count2]

1576

+ postpatch_text[char_count2 + len(diff_text) :]

1577

)

1578

elif (

1579

diff_type == self.DIFF_EQUAL

1580

and len(diff_text) <= 2 * self.Patch_Margin

1581

and len(patch.diffs) != 0

1582

and len(diffs) != x + 1

1583

):

1584

# Small equality inside a patch.

1585

patch.diffs.append(diffs[x])

1586

patch.length1 += len(diff_text)

1587

patch.length2 += len(diff_text)

1500

1588

1501

if (diff_type == self.DIFF_EQUAL and

1502

len(diff_text) >= 2 * self.Patch_Margin):

1503

# Time for a new patch.

1504

if len(patch.diffs) != 0:

1505

self.patch_addContext(patch, prepatch_text)

1506

patches.~~append~~(~~patch~~)

1507

patch = patch_obj()

1508

# Unlike Unidiff, our patch lists have a rolling context.

1509

# http://code.google.com/p/google-diff-match-patch/wiki/Unidiff

1510

# Update prepatch text & pos to reflect the application of the

1511

# just completed patch.

1512

prepatch_text = postpatch_text

1513

char_count1 = char_count2

1589

if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin:

1590

# Time for a new patch.

1591

if len(patch.diffs) != 0:

1592

self.patch_addContext(patch, prepatch_text)

1593

patches.append(patch)

1594

patch = patch_obj()

1595

# Unlike Unidiff, our patch lists have a rolling context.

1596

# http://code.google.com/p/google-diff-match-patch/wiki/Unidiff

1597

# Update prepatch text & pos to reflect the application of the

1598

# just completed patch.

1599

prepatch_text = postpatch_text

1600

char_count1 = char_count2

1514

1601

1515

# Update the current character count.

1516

if diff_type != self.DIFF_INSERT:

1517

char_count1 += len(diff_text)

1518

if diff_type != self.DIFF_DELETE:

1519

char_count2 += len(diff_text)

1602

# Update the current character count.

1603

if diff_type != self.DIFF_INSERT:

1604

char_count1 += len(diff_text)

1605

if diff_type != self.DIFF_DELETE:

1606

char_count2 += len(diff_text)

1520

1607

1521

# Pick up the leftover patch if not empty.

1522

if len(patch.diffs) != 0:

1523

self.patch_addContext(patch, prepatch_text)

1524

patches.append(patch)

1525

return patches

1608

# Pick up the leftover patch if not empty.

1609

if len(patch.diffs) != 0:

1610

self.patch_addContext(patch, prepatch_text)

1611

patches.append(patch)

1612

return patches

1526

1613

1527

def patch_deepCopy(self, patches):

1528

"""Given an array of patches, return another array that is identical.

1614

def patch_deepCopy(self, patches):

1615

"""Given an array of patches, return another array that is identical.

1529

1616

1530

Args:

1531

patches: Array of Patch objects.

1617

Args:

1618

patches: Array of Patch objects.

1532

1619

1533

Returns:

1534

Array of Patch objects.

1535

"""

1536

patchesCopy = []

1537

for patch in patches:

1538

patchCopy = patch_obj()

1539

# No need to deep copy the tuples since they are immutable.

1540

patchCopy.diffs = patch.diffs[:]

1541

patchCopy.start1 = patch.start1

1542

patchCopy.start2 = patch.start2

1543

patchCopy.length1 = patch.length1

1544

patchCopy.length2 = patch.length2

1545

patchesCopy.append(patchCopy)

1546

return patchesCopy

1620

Returns:

1621

Array of Patch objects.

1622

"""

1623

patchesCopy = []

1624

for patch in patches:

1625

patchCopy = patch_obj()

1626

# No need to deep copy the tuples since they are immutable.

1627

patchCopy.diffs = patch.diffs[:]

1628

patchCopy.start1 = patch.start1

1629

patchCopy.start2 = patch.start2

1630

patchCopy.length1 = patch.length1

1631

patchCopy.length2 = patch.length2

1632

patchesCopy.append(patchCopy)

1633

return patchesCopy

1547

1634

1548

def patch_apply(self, patches, text):

1549

"""Merge a set of patches onto the text. Return a patched text, as well

1550

as a list of true/false values indicating which patches were applied.

1635

def patch_apply(self, patches, text):

1636

"""Merge a set of patches onto the text. Return a patched text, as well

1637

as a list of true/false values indicating which patches were applied.

1551

1638

1552

Args:

1553

patches: Array of Patch objects.

1554

text: Old text.

1639

Args:

1640

patches: Array of Patch objects.

1641

text: Old text.

1555

1642

1556

Returns:

1557

Two element Array, containing the new text and an array of boolean values.

1558

"""

1559

if not patches:

1560

return (text, [])

1643

Returns:

1644

Two element Array, containing the new text and an array of boolean values.

1645

"""

1646

if not patches:

1647

return (text, [])

1561

1648

1562

# Deep copy the patches so that no changes are made to originals.

1563

patches = self.patch_deepCopy(patches)

1649

# Deep copy the patches so that no changes are made to originals.

1650

patches = self.patch_deepCopy(patches)

1564

1651

1565

nullPadding = self.patch_addPadding(patches)

1566

text = nullPadding + text + nullPadding

1567

self.patch_splitMax(patches)

1652

nullPadding = self.patch_addPadding(patches)

1653

text = nullPadding + text + nullPadding

1654

self.patch_splitMax(patches)

1568

1655

1569

# delta keeps track of the offset between the expected and actual location

1570

# of the previous patch. If there are patches expected at positions 10 and

1571

# 20, but the first patch was found at 12, delta is 2 and the second patch

1572

# has an effective expected position of 22.

1573

delta = 0

1574

results = []

1575

for patch in patches:

1576

expected_loc = patch.start2 + delta

1577

text1 = self.diff_text1(patch.diffs)

1578

end_loc = -1

1579

if len(text1) > self.Match_MaxBits:

1580

# patch_splitMax will only provide an oversized pattern in the case of

1581

# a monster delete.

1582

start_loc = self.match_main(~~text~~, ~~text1~~[:~~self~~.~~Match_MaxBits~~],

1583

expected_loc)

1584

if start_loc != -1:

1585

end_loc = self.match_main(text, text1[-self.Match_MaxBits:],

1586

~~expecte~~d_loc + ~~len~~(~~text1~~) - self.~~Match_MaxBits~~)

1587

if end_loc == -1 or start_loc >= end_loc:

1588

# Can't find valid trailing context. Drop this patch.

1589

start_loc = -1

1590

else:

1591

start_loc = self.match_main(text, text1, expected_loc)

1592

if start_loc == -1:

1593

# No match found. :(

1594

results.append(False)

1595

# Subtract the delta for this failed patch from subsequent patches.

1596

delta -= patch.length2 - patch.length1

1597

else:

1598

# Found a match. :)

1599

results.append(True)

1600

delta = start_loc - expected_loc

1601

if end_loc == -1:

1602

text2 = text[start_loc : start_loc + len(text1)]

1603

else:

1604

text2 = text[start_loc : end_loc + self.Match_MaxBits]

1605

if text1 == text2:

1606

# Perfect match, just shove the replacement text in.

1607

text = (text[:start_loc] + self.diff_text2(patch.diffs) +

1608

text[start_loc + ~~len~~(~~text1~~):])

1609

else:

1610

# Imperfect match.

1611

# Run a diff to get a framework of equivalent indices.

1612

diffs = self.diff_main(text1, text2, False)

1613

if (len(text1) > self.Match_MaxBits and

1614

self.diff_levenshtein(diffs) / float(len(text1)) >

1615

self.Patch_DeleteThreshold):

1616

# The end points match, but the content is unacceptably bad.

1617

results[-1] = False

1618

else:

1619

self.diff_cleanupSemanticLossless(diffs)

1620

~~index1~~ = 0

1621

for (op, data) in patch.diffs:

1622

if op != self.DIFF_EQUAL:

1623

index2 = self.diff_xIndex(diffs, index1)

1624

if op == self.DIFF_INSERT: # Insertion

1625

text = text[:start_loc + index2] + data + text[start_loc +

1626

index2:]

1627

elif op == self.DIFF_DELETE: # Deletion

1628

text = text[:start_loc + index2] + text[start_loc +

1629

self.diff_xIndex(diffs, index1 + len(data)):]

1630

if op != self.DIFF_DELETE:

1631

index1 += len(data)

1632

# Strip the padding off.

1633

text = text[len(nullPadding):-len(nullPadding)]

1634

return (text, results)

1656

# delta keeps track of the offset between the expected and actual location

1657

# of the previous patch. If there are patches expected at positions 10 and

1658

# 20, but the first patch was found at 12, delta is 2 and the second patch

1659

# has an effective expected position of 22.

1660

delta = 0

1661

results = []

1662

for patch in patches:

1663

expected_loc = patch.start2 + delta

1664

text1 = self.diff_text1(patch.diffs)

1665

end_loc = -1

1666

if len(text1) > self.Match_MaxBits:

1667

# patch_splitMax will only provide an oversized pattern in the case of

1668

# a monster delete.

1669

start_loc = self.match_main(

1670

text, text1[: self.Match_MaxBits], expected_loc

1671

)

1672

if start_loc != -1:

1673

end_loc = self.match_main(

1674

text,

1675

text1[-self.Match_MaxBits :],

1676

expected_loc + len(text1) - self.Match_MaxBits,

1677

)

1678

if end_loc == -1 or start_loc >= end_loc:

1679

# Can't find valid trailing context. Drop this patch.

1680

start_loc = -1

1681

else:

1682

start_loc = self.match_main(text, text1, expected_loc)

1683

if start_loc == -1:

1684

# No match found. :(

1685

results.append(False)

1686

# Subtract the delta for this failed patch from subsequent patches.

1687

delta -= patch.length2 - patch.length1

1688

else:

1689

# Found a match. :)

1690

results.append(True)

1691

delta = start_loc - expected_loc

1692

if end_loc == -1:

1693

text2 = text[start_loc : start_loc + len(text1)]

1694

else:

1695

text2 = text[start_loc : end_loc + self.Match_MaxBits]

1696

if text1 == text2:

1697

# Perfect match, just shove the replacement text in.

1698

text = (

1699

text[:start_loc]

1700

+ self.diff_text2(patch.diffs)

1701

+ text[start_loc + len(text1) :]

1702

)

1703

else:

1704

# Imperfect match.

1705

# Run a diff to get a framework of equivalent indices.

1706

diffs = self.diff_main(text1, text2, False)

1707

if (

1708

len(text1) > self.Match_MaxBits

1709

and self.diff_levenshtein(diffs) / float(len(text1))

1710

> self.Patch_DeleteThreshold

1711

):

1712

# The end points match, but the content is unacceptably bad.

1713

results[-1] = False

1714

else:

1715

self.diff_cleanupSemanticLossless(diffs)

1716

index1 = 0

1717

for op, data in patch.diffs:

1718

if op != self.DIFF_EQUAL:

1719

index2 = self.diff_xIndex(diffs, index1)

1720

if op == self.DIFF_INSERT: # Insertion

1721

text = (

1722

text[: start_loc + index2]

1723

+ data

1724

+ text[start_loc + index2 :]

1725

)

1726

elif op == self.DIFF_DELETE: # Deletion

1727

text = (

1728

text[: start_loc + index2]

1729

+ text[

1730

start_loc

1731

+ self.diff_xIndex(diffs, index1 + len(data)) :

1732

]

1733

)

1734

if op != self.DIFF_DELETE:

1735

index1 += len(data)

1736

# Strip the padding off.

1737

text = text[len(nullPadding) : -len(nullPadding)]

1738

return (text, results)

1635

1739

1636

def patch_addPadding(self, patches):

1637

"""Add some padding on text start and end so that edges can match

1638

something. Intended to be called only from within patch_apply.

1740

def patch_addPadding(self, patches):

1741

"""Add some padding on text start and end so that edges can match

1742

something. Intended to be called only from within patch_apply.

1639

1743

1640

Args:

1641

patches: Array of Patch objects.

1744

Args:

1745

patches: Array of Patch objects.

1642

1746

1643

Returns:

1644

The padding string added to each side.

1645

"""

1646

paddingLength = self.Patch_Margin

1647

nullPadding = ""

1648

for x in range(1, paddingLength + 1):

1649

nullPadding += chr(x)

1747

Returns:

1748

The padding string added to each side.

1749

"""

1750

paddingLength = self.Patch_Margin

1751

nullPadding = ""

1752

for x in range(1, paddingLength + 1):

1753

nullPadding += chr(x)

1650

1754

1651

# Bump all the patches forward.

1652

for patch in patches:

1653

patch.start1 += paddingLength

1654

patch.start2 += paddingLength

1755

# Bump all the patches forward.

1756

for patch in patches:

1757

patch.start1 += paddingLength

1758

patch.start2 += paddingLength

1655

1759

1656

# Add some padding on start of first diff.

1657

patch = patches[0]

1658

diffs = patch.diffs

1659

if not diffs or diffs[0][0] != self.DIFF_EQUAL:

1660

# Add nullPadding equality.

1661

diffs.insert(0, (self.DIFF_EQUAL, nullPadding))

1662

patch.start1 -= paddingLength # Should be 0.

1663

patch.start2 -= paddingLength # Should be 0.

1664

patch.length1 += paddingLength

1665

patch.length2 += paddingLength

1666

elif paddingLength > len(diffs[0][1]):

1667

# Grow first equality.

1668

extraLength = paddingLength - len(diffs[0][1])

1669

newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]

1670

diffs[0] = (diffs[0][0], newText)

1671

patch.start1 -= extraLength

1672

patch.start2 -= extraLength

1673

patch.length1 += extraLength

1674

patch.length2 += extraLength

1760

# Add some padding on start of first diff.

1761

patch = patches[0]

1762

diffs = patch.diffs

1763

if not diffs or diffs[0][0] != self.DIFF_EQUAL:

1764

# Add nullPadding equality.

1765

diffs.insert(0, (self.DIFF_EQUAL, nullPadding))

1766

patch.start1 -= paddingLength # Should be 0.

1767

patch.start2 -= paddingLength # Should be 0.

1768

patch.length1 += paddingLength

1769

patch.length2 += paddingLength

1770

elif paddingLength > len(diffs[0][1]):

1771

# Grow first equality.

1772

extraLength = paddingLength - len(diffs[0][1])

1773

newText = nullPadding[len(diffs[0][1]) :] + diffs[0][1]

1774

diffs[0] = (diffs[0][0], newText)

1775

patch.start1 -= extraLength

1776

patch.start2 -= extraLength

1777

patch.length1 += extraLength

1778

patch.length2 += extraLength

1675

1779

1676

# Add some padding on end of last diff.

1677

patch = patches[-1]

1678

diffs = patch.diffs

1679

if not diffs or diffs[-1][0] != self.DIFF_EQUAL:

1680

# Add nullPadding equality.

1681

diffs.append((self.DIFF_EQUAL, nullPadding))

1682

patch.length1 += paddingLength

1683

patch.length2 += paddingLength

1684

elif paddingLength > len(diffs[-1][1]):

1685

# Grow last equality.

1686

extraLength = paddingLength - len(diffs[-1][1])

1687

newText = diffs[-1][1] + nullPadding[:extraLength]

1688

diffs[-1] = (diffs[-1][0], newText)

1689

patch.length1 += extraLength

1690

patch.length2 += extraLength

1780

# Add some padding on end of last diff.

1781

patch = patches[-1]

1782

diffs = patch.diffs

1783

if not diffs or diffs[-1][0] != self.DIFF_EQUAL:

1784

# Add nullPadding equality.

1785

diffs.append((self.DIFF_EQUAL, nullPadding))

1786

patch.length1 += paddingLength

1787

patch.length2 += paddingLength

1788

elif paddingLength > len(diffs[-1][1]):

1789

# Grow last equality.

1790

extraLength = paddingLength - len(diffs[-1][1])

1791

newText = diffs[-1][1] + nullPadding[:extraLength]

1792

diffs[-1] = (diffs[-1][0], newText)

1793

patch.length1 += extraLength

1794

patch.length2 += extraLength

1691

1795

1692

return nullPadding

1796

return nullPadding

1693

1797

1694

def patch_splitMax(self, patches):

1695

"""Look through the patches and break up any which are longer than the

1696

maximum limit of the match algorithm.

1697

Intended to be called only from within patch_apply.

1798

def patch_splitMax(self, patches):

1799

"""Look through the patches and break up any which are longer than the

1800

maximum limit of the match algorithm.

1801

Intended to be called only from within patch_apply.

1698

1802

1699

Args:

1700

patches: Array of Patch objects.

1701

"""

1702

patch_size = self.Match_MaxBits

1703

if patch_size == 0:

1704

# Python has the option of not splitting strings due to its ability

1705

# to handle integers of arbitrary precision.

1706

return

1707

for x in range(len(patches)):

1708

if patches[x].length1 <= patch_size:

1709

continue

1710

bigpatch = patches[x]

1711

# Remove the big old patch.

1712

del patches[x]

1713

x -= 1

1714

start1 = bigpatch.start1

1715

start2 = bigpatch.start2

1716

precontext = ''

1717

while len(bigpatch.diffs) != 0:

1718

# Create one of several smaller patches.

1719

patch = patch_obj()

1720

empty = True

1721

patch.start1 = start1 - len(precontext)

1722

patch.start2 = start2 - len(precontext)

1723

if precontext:

1724

patch.length1 = patch.length2 = len(precontext)

1725

patch.diffs.append((self.DIFF_EQUAL, precontext))

1803

Args:

1804

patches: Array of Patch objects.

1805

"""

1806

patch_size = self.Match_MaxBits

1807

if patch_size == 0:

1808

# Python has the option of not splitting strings due to its ability

1809

# to handle integers of arbitrary precision.

1810

return

1811

for x in range(len(patches)):

1812

if patches[x].length1 <= patch_size:

1813

continue

1814

bigpatch = patches[x]

1815

# Remove the big old patch.

1816

del patches[x]

1817

x -= 1

1818

start1 = bigpatch.start1

1819

start2 = bigpatch.start2

1820

precontext = ""

1821

while len(bigpatch.diffs) != 0:

1822

# Create one of several smaller patches.

1823

patch = patch_obj()

1824

empty = True

1825

patch.start1 = start1 - len(precontext)

1826

patch.start2 = start2 - len(precontext)

1827

if precontext:

1828

patch.length1 = patch.length2 = len(precontext)

1829

patch.diffs.append((self.DIFF_EQUAL, precontext))

1726

1830

1727

while (len(bigpatch.diffs) != 0 and

1728

patch.length1 < patch_size - self.Patch_Margin):

1729

(diff_type, diff_text) = bigpatch.diffs[0]

1730

if diff_type == self.DIFF_INSERT:

1731

# Insertions are harmless.

1732

patch.length2 += len(diff_text)

1733

start2 += len(diff_text)

1734

patch.diffs.append(bigpatch.diffs.pop(0))

1735

empty = False

1736

elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and

1737

patch.diffs[0][0] == self.DIFF_EQUAL and

1738

len(diff_text) > 2 * patch_size):

1739

# This is a large deletion. Let it pass in one chunk.

1740

patch.length1 += len(diff_text)

1741

start1 += len(diff_text)

1742

empty = False

1743

patch.diffs.append((diff_type, diff_text))

1744

del bigpatch.diffs[0]

1745

else:

1746

# Deletion or equality. Only take as much as we can stomach.

1747

diff_text = diff_text[:patch_size - patch.length1 -

1748

self.Patch_Margin]

1749

patch.length1 += len(diff_text)

1750

start1 += len(diff_text)

1751

if diff_type == self.DIFF_EQUAL:

1752

patch.length2 += len(diff_text)

1753

start2 += len(diff_text)

1754

else:

1755

empty = False

1831

while (

1832

len(bigpatch.diffs) != 0

1833

and patch.length1 < patch_size - self.Patch_Margin

1834

):

1835

(diff_type, diff_text) = bigpatch.diffs[0]

1836

if diff_type == self.DIFF_INSERT:

1837

# Insertions are harmless.

1838

patch.length2 += len(diff_text)

1839

start2 += len(diff_text)

1840

patch.diffs.append(bigpatch.diffs.pop(0))

1841

empty = False

1842

elif (

1843

diff_type == self.DIFF_DELETE

1844

and len(patch.diffs) == 1

1845

and patch.diffs[0][0] == self.DIFF_EQUAL

1846

and len(diff_text) > 2 * patch_size

1847

):

1848

# This is a large deletion. Let it pass in one chunk.

1849

patch.length1 += len(diff_text)

1850

start1 += len(diff_text)

1851

empty = False

1852

patch.diffs.append((diff_type, diff_text))

1853

del bigpatch.diffs[0]

1854

else:

1855

# Deletion or equality. Only take as much as we can stomach.

1856

diff_text = diff_text[

1857

: patch_size - patch.length1 - self.Patch_Margin

1858

]

1859

patch.length1 += len(diff_text)

1860

start1 += len(diff_text)

1861

if diff_type == self.DIFF_EQUAL:

1862

patch.length2 += len(diff_text)

1863

start2 += len(diff_text)

1864

else:

1865

empty = False

1756

1866

1757

patch.diffs.append((diff_type, diff_text))

1758

if diff_text == bigpatch.diffs[0][1]:

1759

del bigpatch.diffs[0]

1760

else:

1761

bigpatch.diffs[0] = (~~bigpatch~~.~~diffs~~[0][0],

1762

bigpatch.diffs[0][1][~~len~~(~~diff_text):])~~

1867

patch.diffs.append((diff_type, diff_text))

1868

if diff_text == bigpatch.diffs[0][1]:

1869

del bigpatch.diffs[0]

1870

else:

1871

bigpatch.diffs[0] = (

1872

bigpatch.diffs[0][0],

1873

bigpatch.diffs[0][1][len(diff_text) :],

1874

)

1763

1875

1764

# Compute the head context for the next patch.

1765

precontext = self.diff_text2(patch.diffs)

1766

precontext = precontext[-self.Patch_Margin:]

1767

# Append the end context for this patch.

1768

postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]

1769

if postcontext:

1770

patch.length1 += len(postcontext)

1771

patch.length2 += len(postcontext)

1772

if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:

1773

patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +

1774

~~postcontext~~)

1775

else:

1776

patch.diffs.append((self.DIFF_EQUAL, postcontext))

1876

# Compute the head context for the next patch.

1877

precontext = self.diff_text2(patch.diffs)

1878

precontext = precontext[-self.Patch_Margin :]

1879

# Append the end context for this patch.

1880

postcontext = self.diff_text1(bigpatch.diffs)[: self.Patch_Margin]

1881

if postcontext:

1882

patch.length1 += len(postcontext)

1883

patch.length2 += len(postcontext)

1884

if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:

1885

patch.diffs[-1] = (

1886

self.DIFF_EQUAL,

1887

patch.diffs[-1][1] + postcontext,

1888

)

1889

else:

1890

patch.diffs.append((self.DIFF_EQUAL, postcontext))

1777

1891

1778

if not empty:

1779

x += 1

1780

patches.insert(x, patch)

1781

1782

def patch_toText(self, patches):

1783

"""Take a list of patches and return a textual representation.

1892

if not empty:

1893

x += 1

1894

patches.insert(x, patch)

1784

1895

1785

Args:

1786

patches: Array of Patch objects.

1896

def patch_toText(self, patches):

1897

"""Take a list of patches and return a textual representation.

1898

1899

Args:

1900

patches: Array of Patch objects.

1787

1901

1788

Returns:

1789

Text representation of patches.

1790

"""

1791

text = []

1792

for patch in patches:

1793

text.append(str(patch))

1794

return "".join(text)

1902

Returns:

1903

Text representation of patches.

1904

"""

1905

text = []

1906

for patch in patches:

1907

text.append(str(patch))

1908

return "".join(text)

1795

1909

1796

def patch_fromText(self, textline):

1797

"""Parse a textual representation of patches and return a list of patch

1798

objects.

1910

def patch_fromText(self, textline):

1911

"""Parse a textual representation of patches and return a list of patch

1912

objects.

1799

1913

1800

Args:

1801

textline: Text representation of patches.

1914

Args:

1915

textline: Text representation of patches.

1802

1916

1803

Returns:

1804

Array of Patch objects.

1917

Returns:

1918

Array of Patch objects.

1805

1919

1806

Raises:

1807

ValueError: If invalid input.

1808

"""

1809

if type(textline) == unicode:

1810

# Patches should be composed of a subset of ascii chars, Unicode not

1811

# required. If this encode raises UnicodeEncodeError, patch is invalid.

1812

textline = textline.encode("ascii")

1813

patches = []

1814

if not textline:

1815

return patches

1816

text = textline.split('\n')

1817

while len(text) != 0:

1818

m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])

1819

if not m:

1820

raise ValueError("Invalid patch string: " + text[0])

1821

patch = patch_obj()

1822

patches.append(patch)

1823

patch.start1 = int(m.group(1))

1824

if m.group(2) == '':

1825

patch.start1 -= 1

1826

patch.length1 = 1

1827

elif m.group(2) == '0':

1828

patch.length1 = 0

1829

else:

1830

patch.start1 -= 1

1831

patch.length1 = int(m.group(2))

1920

Raises:

1921

ValueError: If invalid input.

1922

"""

1923

if type(textline) == unicode:

1924

# Patches should be composed of a subset of ascii chars, Unicode not

1925

# required. If this encode raises UnicodeEncodeError, patch is invalid.

1926

textline = textline.encode("ascii")

1927

patches = []

1928

if not textline:

1929

return patches

1930

text = textline.split("\n")

1931

while len(text) != 0:

1932

m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])

1933

if not m:

1934

raise ValueError("Invalid patch string: " + text[0])

1935

patch = patch_obj()

1936

patches.append(patch)

1937

patch.start1 = int(m.group(1))

1938

if m.group(2) == "":

1939

patch.start1 -= 1

1940

patch.length1 = 1

1941

elif m.group(2) == "0":

1942

patch.length1 = 0

1943

else:

1944

patch.start1 -= 1

1945

patch.length1 = int(m.group(2))

1832

1946

1833

patch.start2 = int(m.group(3))

1834

if m.group(4) == '':

1835

patch.start2 -= 1

1836

patch.length2 = 1

1837

elif m.group(4) == '0':

1838

patch.length2 = 0

1839

else:

1840

patch.start2 -= 1

1841

patch.length2 = int(m.group(4))

1947

patch.start2 = int(m.group(3))

1948

if m.group(4) == "":

1949

patch.start2 -= 1

1950

patch.length2 = 1

1951

elif m.group(4) == "0":

1952

patch.length2 = 0

1953

else:

1954

patch.start2 -= 1

1955

patch.length2 = int(m.group(4))

1842

1956

1843

del text[0]

1957

del text[0]

1844

1958

1845

while len(text) != 0:

1846

if text[0]:

1847

sign = text[0][0]

1848

else:

1849

sign = ''

1850

line = urllib.parse.unquote(text[0][1:])

1851

line = line.decode("utf-8")

1852

if sign == '+':

1853

# Insertion.

1854

patch.diffs.append((self.DIFF_INSERT, line))

1855

elif sign == '-':

1856

# Deletion.

1857

patch.diffs.append((self.DIFF_DELETE, line))

1858

elif sign == ' ':

1859

# Minor equality.

1860

patch.diffs.append((self.DIFF_EQUAL, line))

1861

elif sign == '@':

1862

# Start of next patch.

1863

break

1864

elif sign == '':

1865

# Blank line? Whatever.

1866

pass

1867

else:

1868

# WTF?

1869

raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))

1870

del text[0]

1871

return patches

1959

while len(text) != 0:

1960

if text[0]:

1961

sign = text[0][0]

1962

else:

1963

sign = ""

1964

line = urllib.parse.unquote(text[0][1:])

1965

line = line.decode("utf-8")

1966

if sign == "+":

1967

# Insertion.

1968

patch.diffs.append((self.DIFF_INSERT, line))

1969

elif sign == "-":

1970

# Deletion.

1971

patch.diffs.append((self.DIFF_DELETE, line))

1972

elif sign == " ":

1973

# Minor equality.

1974

patch.diffs.append((self.DIFF_EQUAL, line))

1975

elif sign == "@":

1976

# Start of next patch.

1977

break

1978

elif sign == "":

1979

# Blank line? Whatever.

1980

pass

1981

else:

1982

# WTF?

1983

raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))

1984

del text[0]

1985

return patches

1872

1986

1873

1987

1874

1988

class patch_obj:

1875

"""Class representing one patch operation.

1876

"""

1989

"""Class representing one patch operation."""

1877

1990

1878

def __init__(self):

1879

"""Initializes with an empty list of diffs.

1880

"""

1881

self.diffs = []

1882

self.start1 = None

1883

self.start2 = None

1884

self.length1 = 0

1885

self.length2 = 0

1991

def __init__(self):

1992

"""Initializes with an empty list of diffs."""

1993

self.diffs = []

1994

self.start1 = None

1995

self.start2 = None

1996

self.length1 = 0

1997

self.length2 = 0

1886

1998

1887

def __str__(self):

1888

"""Emmulate GNU diff's format.

1889

Header: @@ -382,8 +481,9 @@

1890

Indicies are printed as 1-based, not 0-based.

1999

def __str__(self):

2000

"""Emmulate GNU diff's format.

2001

Header: @@ -382,8 +481,9 @@

2002

Indicies are printed as 1-based, not 0-based.

1891

2003

1892

Returns:

1893

The GNU diff string.

1894

"""

1895

if self.length1 == 0:

1896

coords1 = str(self.start1) + ",0"

1897

elif self.length1 == 1:

1898

coords1 = str(self.start1 + 1)

1899

else:

1900

coords1 = str(self.start1 + 1) + "," + str(self.length1)

1901

if self.length2 == 0:

1902

coords2 = str(self.start2) + ",0"

1903

elif self.length2 == 1:

1904

coords2 = str(self.start2 + 1)

1905

else:

1906

coords2 = str(self.start2 + 1) + "," + str(self.length2)

1907

text = ["@@ -", coords1, " +", coords2, " @@\n"]

1908

# Escape the body of the patch with %xx notation.

1909

for (op, data) in self.diffs:

1910

if op == diff_match_patch.DIFF_INSERT:

1911

text.append("+")

1912

elif op == diff_match_patch.DIFF_DELETE:

1913

text.append("-")

1914

elif op == diff_match_patch.DIFF_EQUAL:

1915

text.append(" ")

1916

# High ascii will raise UnicodeDecodeError. Use Unicode instead.

1917

data = data.encode("utf-8")

1918

text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n")

1919

return "".join(text) No newline at end of file

2004

Returns:

2005

The GNU diff string.

2006

"""

2007

if self.length1 == 0:

2008

coords1 = str(self.start1) + ",0"

2009

elif self.length1 == 1:

2010

coords1 = str(self.start1 + 1)

2011

else:

2012

coords1 = str(self.start1 + 1) + "," + str(self.length1)

2013

if self.length2 == 0:

2014

coords2 = str(self.start2) + ",0"

2015

elif self.length2 == 1:

2016

coords2 = str(self.start2 + 1)

2017

else:

2018

coords2 = str(self.start2 + 1) + "," + str(self.length2)

2019

text = ["@@ -", coords1, " +", coords2, " @@\n"]

2020

# Escape the body of the patch with %xx notation.

2021

for op, data in self.diffs:

2022

if op == diff_match_patch.DIFF_INSERT:

2023

text.append("+")

2024

elif op == diff_match_patch.DIFF_DELETE:

2025

text.append("-")

2026

elif op == diff_match_patch.DIFF_EQUAL:

2027

text.append(" ")

2028

# High ascii will raise UnicodeDecodeError. Use Unicode instead.

2029

data = data.encode("utf-8")

2030

text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n")

2031

return "".join(text)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages