diff --git a/gtm b/gtm index 69787b6..a662e6a 100755 --- a/gtm +++ b/gtm @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import curses +import difflib import os import subprocess import sys @@ -33,30 +34,26 @@ def find_best_matching_line(reference_line, file_lines, max_lines=None): if line == reference_line: return i - # If no exact match, try to find the most similar line + # If no exact match, use difflib to find the most similar line # Only search through a reasonable number of lines for performance search_lines = file_lines[:max_lines] if max_lines else file_lines - best_match = None - best_score = 0 + best_match_idx = None + best_ratio = 0.0 + + # Create a SequenceMatcher for the reference line, but reuse it for efficiency + s = difflib.SequenceMatcher(None, reference_line) for i, line in enumerate(search_lines): - # Simple similarity score: count of common characters - score = sum(1 for a, b in zip(reference_line, line) if a == b) - - # Adjust score based on length difference - length_diff = abs(len(reference_line) - len(line)) - adjusted_score = score - (length_diff * 0.5) - - if adjusted_score > best_score: - best_score = adjusted_score - best_match = i + s.set_seq2(line) + ratio = s.ratio() + if ratio > best_ratio: + best_ratio = ratio + best_match_idx = i - # Only return a match if it's reasonably good - # (at least 60% of the shorter string length) - min_length = min(len(reference_line), 1) # Avoid division by zero - if best_score > (min_length * 0.6): - return best_match + # Only return a match if it's reasonably good (e.g., ratio > 0.6) + if best_ratio > 0.6: + return best_match_idx return None