refactor: Use difflib.SequenceMatcher for improved line matching logic

This commit is contained in:
n loewen (aider) 2025-06-08 01:16:57 +01:00
parent 76e0e8789c
commit cc25765a41
1 changed files with 15 additions and 18 deletions

33
gtm
View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import curses
import difflib
import os
import subprocess
import sys
@ -33,30 +34,26 @@ def find_best_matching_line(reference_line, file_lines, max_lines=None):
if line == reference_line:
return i
# If no exact match, try to find the most similar line
# If no exact match, use difflib to find the most similar line
# Only search through a reasonable number of lines for performance
search_lines = file_lines[:max_lines] if max_lines else file_lines
best_match = None
best_score = 0
best_match_idx = None
best_ratio = 0.0
# Create a SequenceMatcher for the reference line, but reuse it for efficiency
s = difflib.SequenceMatcher(None, reference_line)
for i, line in enumerate(search_lines):
# Simple similarity score: count of common characters
score = sum(1 for a, b in zip(reference_line, line) if a == b)
# Adjust score based on length difference
length_diff = abs(len(reference_line) - len(line))
adjusted_score = score - (length_diff * 0.5)
if adjusted_score > best_score:
best_score = adjusted_score
best_match = i
s.set_seq2(line)
ratio = s.ratio()
if ratio > best_ratio:
best_ratio = ratio
best_match_idx = i
# Only return a match if it's reasonably good
# (at least 60% of the shorter string length)
min_length = min(len(reference_line), 1) # Avoid division by zero
if best_score > (min_length * 0.6):
return best_match
# Only return a match if it's reasonably good (e.g., ratio > 0.6)
if best_ratio > 0.6:
return best_match_idx
return None