📄 src/Domain/ClipSelector.cs
using System;
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;

namespace Slopper.Domain;

public sealed class ClipSelector(
    ISubtitleReader subtitleReader,
    IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator,
    IOptions<ClipSelectorOptions> options
)
{
    private readonly Lazy<Task<IReadOnlyList<Embedding<float>>>> clippableQuotesEmbeddings = new(async () =>
        await embeddingGenerator.GenerateAsync(options.Value.ClippableQuotes)
    );

    public async Task<(TimeSpan start, TimeSpan duration)> PickClip(
        MediaItem media,
        CancellationToken cancellationToken
    )
    {
        var subtitles = await subtitleReader.ReadSubtitles(media, cancellationToken);

        var subtitleLines = subtitles.SelectMany(s => s.Lines);
        var subtitleEmbeddings = await embeddingGenerator.GenerateAsync(
            subtitleLines,
            cancellationToken: cancellationToken
        );

        return FindBestMatch(
            subtitleEmbeddings.Zip(subtitles.SelectMany(s => s.Lines.Select(_ => s))),
            await clippableQuotesEmbeddings.Value
        );
    }

    private static (TimeSpan, TimeSpan) FindBestMatch(
        IEnumerable<(Embedding<float> embedding, SubtitleEntry subtitle)> subtitleEmbeddings,
        IReadOnlyList<Embedding<float>> clippableQuotesEmbeddings
    )
    {
        var se = subtitleEmbeddings.ToArray();
        (float, int)? best = null;
        for (var i = 0; i < se.Length; i++)
        {
            var score = CalculateSimilarity(se[i].embedding, clippableQuotesEmbeddings);
            if (best is not (var previousBestScore, _) || previousBestScore < score)
            {
                best = (score, i);
            }
        }
        if (best is not (_, var bestIndex))
        {
            throw new Exception("No subtitle entries found, no best match possible.");
        }
        var start = se[int.Max(0, bestIndex - 2)].subtitle.Start;
        var end = se[int.Min(bestIndex + 1, se.Length - 1)].subtitle;
        var duration = end.Start + end.Duration - start;
        return (start, duration);
    }

    private static float CalculateSimilarity(
        Embedding<float> subtitleEmbedding,
        IReadOnlyList<Embedding<float>> clippableQuotesEmbeddings
    ) => clippableQuotesEmbeddings.Max(qe => CalculateEmbeddingSimilarity(subtitleEmbedding, qe));

    private static float CalculateEmbeddingSimilarity(Embedding<float> a, Embedding<float> b)
    {
        var vectorA = a.Vector.Span;
        var vectorB = b.Vector.Span;

        float dotProduct = 0.0f;
        float magnitudeA = 0.0f;
        float magnitudeB = 0.0f;

        int count = int.Min(vectorA.Length, vectorB.Length);
        for (int i = 0; i < count; i++)
        {
            dotProduct += vectorA[i] * vectorB[i];
            magnitudeA += vectorA[i] * vectorA[i];
            magnitudeB += vectorB[i] * vectorB[i];
        }

        if (magnitudeA <= 0.0f || magnitudeB <= 0.0f)
        {
            return 0.0f;
        }

        return dotProduct / (float.Sqrt(magnitudeA) * float.Sqrt(magnitudeB));
    }
}

public sealed class ClipSelectorOptions
{
    [Required, MinLength(1)]
    public required IReadOnlyList<string> ClippableQuotes { get; set; }
}

[OptionsValidator]
internal sealed partial class ClipSelectorOptionsValidator : IValidateOptions<ClipSelectorOptions>;

public static class ClipSelectorServiceCollectionExtensions
{
    extension(IServiceCollection services)
    {
        public IServiceCollection AddClipSelector()
        {
            services.AddOptions<ClipSelectorOptions>().BindConfiguration("ClipSelector").ValidateOnStart();
            services.AddTransient<IValidateOptions<ClipSelectorOptions>, ClipSelectorOptionsValidator>();

            services.AddSingleton<ClipSelector>();

            return services;
        }
    }
}