src/Domain/ClipSelector.cs
+23
-5
diff --git a/src/Domain/ClipSelector.cs b/src/Domain/ClipSelector.cs
index f37034e..e3e2214 100644
@@ -33,18 +33,36 @@ public sealed class ClipSelector(
cancellationToken: cancellationToken
);
var bestMatch = FindBestMatch(
return FindBestMatch(
subtitleEmbeddings.Zip(subtitles.SelectMany(s => s.Lines.Select(_ => s))),
await clippableQuotesEmbeddings.Value
);
return (bestMatch.Start, bestMatch.Duration);
}
private static SubtitleEntry FindBestMatch(
private static (TimeSpan, TimeSpan) FindBestMatch(
IEnumerable<(Embedding<float> embedding, SubtitleEntry subtitle)> subtitleEmbeddings,
IReadOnlyList<Embedding<float>> clippableQuotesEmbeddings
) => subtitleEmbeddings.MaxBy(p => CalculateSimilarity(p.embedding, clippableQuotesEmbeddings)).subtitle;
)
{
var se = subtitleEmbeddings.ToArray();
(float, int)? best = null;
for (var i = 0; i < se.Length; i++)
{
var score = CalculateSimilarity(se[i].embedding, clippableQuotesEmbeddings);
if (best is not (var previousBestScore, _) || previousBestScore < score)
{
best = (score, i);
}
}
if (best is not (_, var bestIndex))
{
throw new Exception("No subtitle entries found, no best match possible.");
}
var start = se[int.Max(0, bestIndex - 2)].subtitle.Start;
var end = se[int.Min(bestIndex + 1, se.Length - 1)].subtitle;
var duration = end.Start + end.Duration - start;
return (start, duration);
}
private static float CalculateSimilarity(
Embedding<float> subtitleEmbedding,