refactor: Use difflib.SequenceMatcher for improved line matching logic
This commit is contained in:
parent
76e0e8789c
commit
cc25765a41
33
gtm
33
gtm
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import curses
|
||||
import difflib
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
|
@ -33,30 +34,26 @@ def find_best_matching_line(reference_line, file_lines, max_lines=None):
|
|||
if line == reference_line:
|
||||
return i
|
||||
|
||||
# If no exact match, try to find the most similar line
|
||||
# If no exact match, use difflib to find the most similar line
|
||||
# Only search through a reasonable number of lines for performance
|
||||
search_lines = file_lines[:max_lines] if max_lines else file_lines
|
||||
|
||||
best_match = None
|
||||
best_score = 0
|
||||
best_match_idx = None
|
||||
best_ratio = 0.0
|
||||
|
||||
# Create a SequenceMatcher for the reference line, but reuse it for efficiency
|
||||
s = difflib.SequenceMatcher(None, reference_line)
|
||||
|
||||
for i, line in enumerate(search_lines):
|
||||
# Simple similarity score: count of common characters
|
||||
score = sum(1 for a, b in zip(reference_line, line) if a == b)
|
||||
|
||||
# Adjust score based on length difference
|
||||
length_diff = abs(len(reference_line) - len(line))
|
||||
adjusted_score = score - (length_diff * 0.5)
|
||||
|
||||
if adjusted_score > best_score:
|
||||
best_score = adjusted_score
|
||||
best_match = i
|
||||
s.set_seq2(line)
|
||||
ratio = s.ratio()
|
||||
if ratio > best_ratio:
|
||||
best_ratio = ratio
|
||||
best_match_idx = i
|
||||
|
||||
# Only return a match if it's reasonably good
|
||||
# (at least 60% of the shorter string length)
|
||||
min_length = min(len(reference_line), 1) # Avoid division by zero
|
||||
if best_score > (min_length * 0.6):
|
||||
return best_match
|
||||
# Only return a match if it's reasonably good (e.g., ratio > 0.6)
|
||||
if best_ratio > 0.6:
|
||||
return best_match_idx
|
||||
|
||||
return None
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue