refactor: Use difflib.SequenceMatcher for improved line matching logic
This commit is contained in:
parent
76e0e8789c
commit
cc25765a41
33
gtm
33
gtm
|
|
@ -1,6 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import curses
|
import curses
|
||||||
|
import difflib
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
|
@ -33,30 +34,26 @@ def find_best_matching_line(reference_line, file_lines, max_lines=None):
|
||||||
if line == reference_line:
|
if line == reference_line:
|
||||||
return i
|
return i
|
||||||
|
|
||||||
# If no exact match, try to find the most similar line
|
# If no exact match, use difflib to find the most similar line
|
||||||
# Only search through a reasonable number of lines for performance
|
# Only search through a reasonable number of lines for performance
|
||||||
search_lines = file_lines[:max_lines] if max_lines else file_lines
|
search_lines = file_lines[:max_lines] if max_lines else file_lines
|
||||||
|
|
||||||
best_match = None
|
best_match_idx = None
|
||||||
best_score = 0
|
best_ratio = 0.0
|
||||||
|
|
||||||
|
# Create a SequenceMatcher for the reference line, but reuse it for efficiency
|
||||||
|
s = difflib.SequenceMatcher(None, reference_line)
|
||||||
|
|
||||||
for i, line in enumerate(search_lines):
|
for i, line in enumerate(search_lines):
|
||||||
# Simple similarity score: count of common characters
|
s.set_seq2(line)
|
||||||
score = sum(1 for a, b in zip(reference_line, line) if a == b)
|
ratio = s.ratio()
|
||||||
|
if ratio > best_ratio:
|
||||||
# Adjust score based on length difference
|
best_ratio = ratio
|
||||||
length_diff = abs(len(reference_line) - len(line))
|
best_match_idx = i
|
||||||
adjusted_score = score - (length_diff * 0.5)
|
|
||||||
|
|
||||||
if adjusted_score > best_score:
|
|
||||||
best_score = adjusted_score
|
|
||||||
best_match = i
|
|
||||||
|
|
||||||
# Only return a match if it's reasonably good
|
# Only return a match if it's reasonably good (e.g., ratio > 0.6)
|
||||||
# (at least 60% of the shorter string length)
|
if best_ratio > 0.6:
|
||||||
min_length = min(len(reference_line), 1) # Avoid division by zero
|
return best_match_idx
|
||||||
if best_score > (min_length * 0.6):
|
|
||||||
return best_match
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue