mkvdup: Go Coverage Report

package main

import (
        "fmt"
        "os"
        "path/filepath"
        "strings"
        "time"

        "github.com/stuckj/mkvdup/internal/dedup"
        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/source"
)

// sourceGroup represents a set of files sharing the same source directory.
type sourceGroup struct {
        sourceDir string
        indices   []int // indices into the manifest Files slice
}

// groupBySource groups batch manifest files by their SourceDir.
// Groups are returned in first-seen order, and file indices within each group
// preserve their original manifest order.
func groupBySource(files []dedup.BatchManifestFile) []sourceGroup {
        var groups []sourceGroup
        seen := map[string]int{} // sourceDir -> index in groups
        for i, f := range files {
                if gi, ok := seen[f.SourceDir]; ok {
                        groups[gi].indices = append(groups[gi].indices, i)
                } else {
                        seen[f.SourceDir] = len(groups)
                        groups = append(groups, sourceGroup{sourceDir: f.SourceDir, indices: []int{i}})
                }
        }
        return groups
}

// codecMismatchAction controls how reportCodecMismatches handles a mismatch.
type codecMismatchAction int

const (
        codecMismatchPrompt   codecMismatchAction = iota // interactive: prompt user
        codecMismatchContinue                            // non-interactive: warn and continue
        codecMismatchSkip                                // skip: warn and signal skip
)

// reportCodecMismatches prints codec mismatch warnings and handles the response
// based on the action: prompt the user, continue without prompting (still logging to stderr),
// or signal a skip. Returns an error if the user declines to continue (prompt mode only).
func reportCodecMismatches(mismatches []source.CodecMismatch, action codecMismatchAction) error {
        if len(mismatches) == 0 {
                return nil
        }

        // Print warning to stderr (always visible, even in quiet mode)
        printWarnln()
        printWarnln("  WARNING: Codec mismatch detected")
        for _, m := range mismatches {
                mkvName := source.CodecTypeName(m.MKVCodecType)
                var sourceNames []string
                for _, sc := range m.SourceCodecs {
                        sourceNames = append(sourceNames, source.CodecTypeName(sc))
                }
                printWarn("    MKV %s:    %s (%s)\n", m.TrackType, mkvName, m.MKVCodecID)
                printWarn("    Source %s: %s\n", m.TrackType, strings.Join(sourceNames, ", "))
        }
        printWarnln()
        printWarnln("  Deduplication may produce poor results if the MKV was transcoded.")

        switch action {
        case codecMismatchSkip:
                printWarnln("  Skipping (--skip-codec-mismatch)...")
                printWarnln()
                return nil
        case codecMismatchContinue:
                printWarnln("  Continuing (non-interactive mode)...")
                printWarnln()
                return nil
        default:
                // Interactive prompt — auto-continue if stdin is not a terminal
                if !isTerminal() {
                        printWarnln("  Continuing (non-interactive mode)...")
                        printWarnln()
                        return nil
                }
                fmt.Print("\n  Continue anyway? [y/N]: ")
                var response string
                fmt.Scanln(&response)
                response = strings.TrimSpace(strings.ToLower(response))
                if response != "y" && response != "yes" {
                        return fmt.Errorf("aborted due to codec mismatch")
                }
                fmt.Println()
                return nil
        }
}

// createBatch processes multiple MKVs from a batch manifest.
// Files are grouped by source directory so each source is indexed once.
// If skipCodecMismatch is true, MKVs with codec mismatches are skipped instead of processed.
func createBatch(manifestPath string, warnThreshold float64, skipCodecMismatch bool) error {
        totalStart := time.Now()

        manifest, err := dedup.ReadBatchManifest(manifestPath)
        if err != nil {
                return err
        }

        groups := groupBySource(manifest.Files)
        multiSource := len(groups) > 1

        if multiSource {
                printInfo("Batch create: %d %s from %d %s\n\n",
                        len(manifest.Files), plural(len(manifest.Files), "file", "files"),
                        len(groups), plural(len(groups), "source", "sources"))
        } else {
                printInfo("Batch create: %d %s from %s\n\n",
                        len(manifest.Files), plural(len(manifest.Files), "file", "files"), groups[0].sourceDir)
        }

        results := make([]*createResult, len(manifest.Files))
        skipReasons := make([]string, len(manifest.Files))
        var totalIndexDuration time.Duration
        processed := 0

        for gi, g := range groups {
                if multiSource {
                        if gi > 0 {
                                printInfoln()
                        }
                        fileWord := "files"
                        if len(g.indices) == 1 {
                                fileWord = "file"
                        }
                        printInfo("--- Source %d/%d: %s (%d %s) ---\n", gi+1, len(groups), g.sourceDir, len(g.indices), fileWord)
                }

                // Pre-check: skip files whose output already exists (resuming interrupted batch)
                for _, fi := range g.indices {
                        f := manifest.Files[fi]
                        if _, err := os.Stat(f.Output); err == nil {
                                skipReasons[fi] = "output exists"
                        }
                }

                // Pre-check: detect source codecs and warn about incompatible MKVs
                // before the expensive indexing step.
                sourceCodecs, codecErr := source.DetectSourceCodecsFromDir(g.sourceDir)
                if codecErr != nil {
                        if vw := verboseWriter(); vw != nil {
                                fmt.Fprintf(vw, "Note: could not detect source codecs for %s: %v\n", g.sourceDir, codecErr)
                        }
                        printInfoln()
                } else {
                        for _, fi := range g.indices {
                                if skipReasons[fi] != "" {
                                        continue
                                }
                                f := manifest.Files[fi]
                                codecParser, err := mkv.NewParser(f.MKV)
                                if err != nil {
                                        if vw := verboseWriter(); vw != nil {
                                                fmt.Fprintf(vw, "Note: skipping codec pre-check for %s: %v\n", filepath.Base(f.MKV), err)
                                        }
                                        continue
                                }
                                if err := codecParser.ParseTracksOnly(); err != nil {
                                        codecParser.Close()
                                        if vw := verboseWriter(); vw != nil {
                                                fmt.Fprintf(vw, "Note: skipping codec pre-check for %s: %v\n", filepath.Base(f.MKV), err)
                                        }
                                        continue
                                }
                                mismatches := source.CheckCodecCompatibility(codecParser.Tracks(), sourceCodecs)
                                codecParser.Close()
                                if skipCodecMismatch && len(mismatches) > 0 {
                                        reportCodecMismatches(mismatches, codecMismatchSkip)
                                        skipReasons[fi] = "codec mismatch"
                                        continue
                                }
                                if err := reportCodecMismatches(mismatches, codecMismatchContinue); err != nil {
                                        return err
                                }
                        }
                        printInfoln()
                }

                // Check if all files in this group are already skipped — skip indexing entirely
                allSkipped := true
                for _, fi := range g.indices {
                        if skipReasons[fi] == "" {
                                allSkipped = false
                                break
                        }
                }
                if allSkipped {
                        for _, fi := range g.indices {
                                processed++
                                f := manifest.Files[fi]
                                printInfo("\n[%d/%d] %s\n", processed, len(manifest.Files), f.MKV)
                                results[fi] = newSkipResult(f.MKV, f.Output, skipReasons[fi])
                                printSkipStatus(results[fi])
                        }
                        continue
                }

                // Index this source directory
                indexLabel := "Indexing source directory..."
                if multiSource {
                        indexLabel = fmt.Sprintf("Indexing source %d/%d...", gi+1, len(groups))
                }
                indexStart := time.Now()
                indexer, index, err := buildSourceIndex(g.sourceDir, indexLabel)
                totalIndexDuration += time.Since(indexStart)
                if err != nil {
                        printWarn("  ERROR indexing %s: %v\n", g.sourceDir, err)
                        // Mark non-skipped files in this group as failed
                        for _, fi := range g.indices {
                                processed++
                                f := manifest.Files[fi]
                                printInfo("\n[%d/%d] %s\n", processed, len(manifest.Files), f.MKV)
                                if skipReasons[fi] != "" {
                                        results[fi] = newSkipResult(f.MKV, f.Output, skipReasons[fi])
                                        printSkipStatus(results[fi])
                                } else {
                                        results[fi] = &createResult{MkvPath: f.MKV, Err: fmt.Errorf("index %s: %w", g.sourceDir, err)}
                                }
                        }
                        if gi < len(groups)-1 {
                                printWarnln("  Continuing with remaining sources...")
                        }
                        continue
                }

                // Process files in this group
                for _, fi := range g.indices {
                        processed++
                        f := manifest.Files[fi]
                        printInfo("\n[%d/%d] %s\n", processed, len(manifest.Files), f.MKV)
                        if skipReasons[fi] != "" {
                                results[fi] = newSkipResult(f.MKV, f.Output, skipReasons[fi])
                                printSkipStatus(results[fi])
                                continue
                        }
                        results[fi] = createDedupWithIndex(f.MKV, f.SourceDir, f.Output, f.Name, indexer, index, 1, 4, true, skipCodecMismatch)
                        r := results[fi]
                        if r.Skipped {
                                printSkipStatus(r)
                        } else if r.Err != nil {
                                printWarn("  ERROR: %v\n", r.Err)
                                if processed < len(manifest.Files) {
                                        printWarnln("  Continuing with remaining files...")
                                }
                        } else if r.VerifyErr != nil {
                                // Verification error messages are already printed by createDedupWithIndex
                                if processed < len(manifest.Files) {
                                        printWarnln("  Continuing with remaining files...")
                                }
                        } else {
                                printInfo("  MKV: %s bytes | Dedup: %s bytes | Savings: %.1f%% | Time: %v\n",
                                        formatInt(r.MkvSize), formatInt(r.DedupSize), r.Savings, r.Duration.Round(time.Second))
                        }
                }
                index.Close()
        }

        // Print summary
        printBatchSummary(results, totalIndexDuration, totalStart, warnThreshold)

        // Return error only if there were non-skipped files and all of them failed.
        // All-skipped batches (e.g., codec mismatch) are not considered failures.
        // "output exists" (cached) files count as successes — they represent prior
        // successful runs and are shown as "OK [cached]" in the summary.
        hasNonSkipped := false
        anyNonSkippedSucceeded := false
        for _, r := range results {
                isSkipped := r.Skipped && r.SkipReason != "output exists"
                if isSkipped {
                        continue
                }
                hasNonSkipped = true
                if r.Err == nil && r.VerifyErr == nil {
                        anyNonSkippedSucceeded = true
                        break
                }
        }
        if hasNonSkipped && !anyNonSkippedSucceeded {
                return fmt.Errorf("batch create completed with errors")
        }
        return nil
}

// newSkipResult creates a createResult for a skipped file. When the skip reason
// is "output exists", it populates stats from the existing MKV and dedup files.
func newSkipResult(mkvPath, outputPath, reason string) *createResult {
        r := &createResult{MkvPath: mkvPath, Skipped: true, SkipReason: reason}
        if reason == "output exists" {
                r.OutputPath = outputPath
                if mkvStat, err := os.Stat(mkvPath); err == nil {
                        r.MkvSize = mkvStat.Size()
                }
                if dedupStat, err := os.Stat(outputPath); err == nil {
                        r.DedupSize = dedupStat.Size()
                        if r.MkvSize > 0 {
                                r.Savings = float64(r.MkvSize-r.DedupSize) / float64(r.MkvSize) * 100
                        }
                }
        }
        return r
}

// printSkipStatus prints the per-file skip message during batch processing.
// For "output exists" skips with populated stats, it shows file sizes and savings.
func printSkipStatus(r *createResult) {
        if r.SkipReason == "output exists" && r.MkvSize > 0 {
                printInfo("  Skipping (%s): %s bytes | Dedup: %s bytes | Savings: %.1f%%\n",
                        r.SkipReason, formatInt(r.MkvSize), formatInt(r.DedupSize), r.Savings)
        } else {
                printInfo("  Skipping (%s)\n", r.SkipReason)
        }
}

// printBatchSummary prints the aggregate results of a batch create operation.
func printBatchSummary(results []*createResult, indexDuration time.Duration, totalStart time.Time, warnThreshold float64) {
        printInfoln()
        printInfoln("=== Batch Results ===")
        printInfo("Total time: %v (indexing: %v)\n\n", time.Since(totalStart), indexDuration)

        succeeded := 0
        cached := 0
        skipped := 0
        verifyFailed := 0
        var lowSavings []string
        for _, r := range results {
                if r.Skipped && r.SkipReason == "output exists" {
                        // Already-processed files: show as OK with stats
                        cached++
                        if r.OutputPath != "" {
                                printInfo("  OK    %s -> %s (%.1f%% savings) [cached]\n", r.MkvPath, filepath.Base(r.OutputPath), r.Savings)
                        } else {
                                printInfo("  OK    %s [cached]\n", r.MkvPath)
                        }
                        if r.Savings < warnThreshold && r.MkvSize > 0 {
                                lowSavings = append(lowSavings, fmt.Sprintf("  %s: %.1f%% savings", r.MkvPath, r.Savings))
                        }
                } else if r.Skipped {
                        printInfo("  SKIP  %s: %s\n", r.MkvPath, r.SkipReason)
                        skipped++
                } else if r.Err != nil {
                        printWarn("  FAIL  %s: %v\n", r.MkvPath, r.Err)
                } else if r.VerifyErr != nil {
                        printWarn("  FAIL  %s: verification failed: %v\n", r.MkvPath, r.VerifyErr)
                        verifyFailed++
                } else {
                        printInfo("  OK    %s -> %s (%.1f%% savings)\n", r.MkvPath, filepath.Base(r.OutputPath), r.Savings)
                        if r.Savings < warnThreshold {
                                lowSavings = append(lowSavings, fmt.Sprintf("  %s: %.1f%% savings", r.MkvPath, r.Savings))
                        }
                }
                if (!r.Skipped && r.Err == nil && r.VerifyErr == nil) || (r.Skipped && r.SkipReason == "output exists") {
                        succeeded++
                }
        }

        // Build summary line with optional qualifiers
        var qualifiers []string
        if cached > 0 {
                qualifiers = append(qualifiers, fmt.Sprintf("%d cached", cached))
        }
        if verifyFailed > 0 {
                qualifiers = append(qualifiers, fmt.Sprintf("%d verification failed", verifyFailed))
        }
        if skipped > 0 {
                qualifiers = append(qualifiers, fmt.Sprintf("%d skipped", skipped))
        }
        if len(qualifiers) > 0 {
                printInfo("\nSucceeded: %d/%d (%s)\n", succeeded, len(results), strings.Join(qualifiers, ", "))
        } else {
                printInfo("\nSucceeded: %d/%d\n", succeeded, len(results))
        }

        if !quiet && len(lowSavings) > 0 {
                printInfo("\nWARNING: %d %s with space savings below %.0f%%:\n", len(lowSavings), plural(len(lowSavings), "file", "files"), warnThreshold)
                for _, s := range lowSavings {
                        printInfoln(s)
                }
                printInfoln("  This may indicate wrong source, transcoded MKV, or very small MKV file.")
        }
}

package main

import (
        "fmt"
        "log"
        "os"
        "path/filepath"
        "sort"
        "strings"
        "time"

        "github.com/stuckj/mkvdup/internal/dedup"
        "github.com/stuckj/mkvdup/internal/matcher"
        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/source"
)

// osRemove and osRename are wrappers for testing. Override in tests to
// simulate filesystem errors without touching real files.
var osRemove = os.Remove
var osRename = os.Rename

// parseMKVWithProgress parses an MKV file with progress reporting.
// The phasePrefix is shown during parsing (e.g., "Phase 3/6: Parsing MKV file...").
// Returns the parser (caller must Close it) and an error if any.
func parseMKVWithProgress(mkvPath, phasePrefix string) (*mkv.Parser, time.Duration, error) {
        parser, err := mkv.NewParser(mkvPath)
        if err != nil {
                return nil, 0, fmt.Errorf("create parser: %w", err)
        }

        bar := newProgressBar(phasePrefix, parser.Size(), "bytes")
        parseStart := time.Now()
        if err := parser.Parse(func(processed, total int64) {
                bar.Update(processed)
        }); err != nil {
                bar.Cancel()
                parser.Close()
                return nil, 0, fmt.Errorf("parse MKV: %w", err)
        }
        bar.Finish()
        elapsed := time.Since(parseStart)
        return parser, elapsed, nil
}

// createResult holds per-file statistics from a create operation.
type createResult struct {
        MkvPath        string
        OutputPath     string
        VirtualName    string
        MkvSize        int64
        DedupSize      int64
        MatchedBytes   int64
        UnmatchedBytes int64
        MatchedPackets int
        TotalPackets   int
        IndexEntries   int
        Savings        float64
        Duration       time.Duration
        Err            error
        VerifyErr      error  // non-nil if post-create verification failed
        Skipped        bool   // true when file was skipped (e.g., codec mismatch, output exists)
        SkipReason     string // reason for skipping (shown in summary)
}

// buildSourceIndex indexes a source directory and returns the indexer and index.
// This is the expensive step that should only happen once in batch mode.
// The phasePrefix is shown on the progress bar (e.g., "Phase 2/6: Building source index...").
func buildSourceIndex(sourceDir, phasePrefix string) (*source.Indexer, *source.Index, error) {
        indexer, err := source.NewIndexer(sourceDir, source.DefaultWindowSize)
        if err != nil {
                return nil, nil, fmt.Errorf("create indexer: %w", err)
        }
        indexer.SetVerboseWriter(verboseWriter())

        // We don't know total size until Build starts calling back with it,
        // so create bar with 0 and let first Update set the total.
        bar := newProgressBar(phasePrefix, 0, "bytes")
        err = indexer.Build(func(processed, total int64) {
                if bar.total == 0 && total > 0 {
                        bar.total = total
                }
                bar.Update(processed)
        })
        if err != nil {
                bar.Cancel()
                return nil, nil, fmt.Errorf("build index: %w", err)
        }
        bar.Finish()
        index := indexer.Index()
        printInfo("  Indexed %d hashes\n", len(index.HashToLocations))
        if index.UsesESOffsets {
                printInfo("  (Using ES-aware indexing for %v)\n", indexer.SourceType())
        }

        return indexer, index, nil
}

// checkCodecCompatibilityFromDir performs a lightweight codec check using only
// the source directory (no index needed). This runs before the expensive indexing step.
func checkCodecCompatibilityFromDir(tracks []mkv.Track, sourceDir string, nonInteractive bool) error {
        sourceCodecs, err := source.DetectSourceCodecsFromDir(sourceDir)
        if err != nil {
                if vw := verboseWriter(); vw != nil {
                        fmt.Fprintf(vw, "  Note: could not detect source codecs: %v\n", err)
                }
                return nil
        }

        mismatches := source.CheckCodecCompatibility(tracks, sourceCodecs)
        action := codecMismatchPrompt
        if nonInteractive {
                action = codecMismatchContinue
        }
        return reportCodecMismatches(mismatches, action)
}

// createDedupWithIndex processes a single MKV using a pre-built source index.
// It handles parsing, matching, writing, and verification.
// phaseStart and phaseTotal control phase numbering (e.g., 3,6 for single create; 1,4 for batch).
// If nonInteractive is true, codec mismatch warnings do not prompt the user.
// If skipCodecMismatch is true, the result is marked as Skipped on codec mismatch instead of continuing.
func createDedupWithIndex(mkvPath, sourceDir, outputPath, virtualName string,
        indexer *source.Indexer, index *source.Index, phaseStart, phaseTotal int, nonInteractive, skipCodecMismatch bool) *createResult {
        start := time.Now()
        result := &createResult{
                MkvPath:     mkvPath,
                OutputPath:  outputPath,
                VirtualName: virtualName,
        }

        phaseLabel := func(offset int, label string) string {
                return fmt.Sprintf("Phase %d/%d: %s", phaseStart+offset, phaseTotal, label)
        }

        // Parse MKV
        parser, _, err := parseMKVWithProgress(mkvPath, phaseLabel(0, "Parsing MKV file..."))
        if err != nil {
                result.Err = err
                return result
        }
        defer parser.Close()

        // Fallback codec check using the index (in case the pre-indexing directory-based
        // check was skipped, e.g. detection failure or batch mode with undetectable codecs)
        sourceCodecs, codecErr := source.DetectSourceCodecs(index)
        if codecErr == nil {
                mismatches := source.CheckCodecCompatibility(parser.Tracks(), sourceCodecs)
                if skipCodecMismatch && len(mismatches) > 0 {
                        reportCodecMismatches(mismatches, codecMismatchSkip)
                        result.Skipped = true
                        result.SkipReason = "codec mismatch"
                        return result
                }
                action := codecMismatchPrompt
                if nonInteractive {
                        action = codecMismatchContinue
                }
                if err := reportCodecMismatches(mismatches, action); err != nil {
                        result.Err = err
                        return result
                }
        }

        // Calculate MKV checksum
        printInfo("  Calculating MKV checksum...")
        mkvChecksum, err := calculateFileChecksum(mkvPath)
        if err != nil {
                result.Err = fmt.Errorf("calculate MKV checksum: %w", err)
                return result
        }
        printInfo(" done\n")

        // Match packets
        m, err := matcher.NewMatcher(index)
        if err != nil {
                result.Err = fmt.Errorf("create matcher: %w", err)
                return result
        }
        defer m.Close()
        m.SetVerboseWriter(verboseWriter())

        matchBar := newProgressBar(phaseLabel(1, "Matching packets..."), int64(len(parser.Packets())), "packets")
        matchResult, err := m.Match(mkvPath, parser.Packets(), parser.Tracks(), func(processed, total int) {
                matchBar.Update(int64(processed))
        })
        if err != nil {
                matchBar.Cancel()
                result.Err = fmt.Errorf("match: %w", err)
                return result
        }
        defer matchResult.Close()
        matchBar.Finish()

        // Write dedup file
        writer, err := dedup.NewWriter(outputPath)
        if err != nil {
                result.Err = fmt.Errorf("create dedup writer: %w", err)
                return result
        }
        defer writer.Close()

        writer.SetHeader(parser.Size(), mkvChecksum, indexer.SourceType())
        writer.SetCreatorVersion("mkvdup " + version)
        writer.SetSourceFiles(index.Files)

        // For sources with ES offsets, decide between V3 (convert to raw) and V4 (range maps).
        // V4 stores ES offsets with embedded range maps for ES-to-raw translation at read time.
        // V3 converts ES offsets to raw file offsets at write time (simpler, smaller files).
        // V4 is used for Blu-ray (TS packet structure makes V3 impractical) and for DVDs
        // with LPCM audio (byte-swap pairs can straddle PES boundaries, requiring contiguous
        // ES reads that only range maps provide). Non-LPCM DVDs use V3 for fastest reads.
        var esConverters []source.ESRangeConverter
        if index.UsesESOffsets && len(index.ESReaders) > 0 {
                // Check if any matched entry uses LPCM (requires range maps for correct byte-swap).
                hasLPCM := false
                for _, e := range matchResult.Entries {
                        if e.IsLPCM {
                                hasLPCM = true
                                break
                        }
                }

                useRangeMaps := indexer.SourceType() == source.TypeBluray || hasLPCM
                if useRangeMaps {
                        // V4: use range maps (preserves ES offsets in entries)
                        // Only include range maps for streams actually referenced by matched entries.
                        type streamKey struct {
                                fileIndex        uint16
                                isVideo          bool
                                audioSubStreamID byte
                        }
                        usedStreams := make(map[streamKey]bool)
                        for _, e := range matchResult.Entries {
                                if e.Source == 0 {
                                        continue
                                }
                                usedStreams[streamKey{e.Source - 1, e.IsVideo, e.AudioSubStreamID}] = true
                        }
                        // Collect the set of file indices that need range maps
                        usedFiles := make(map[uint16]bool)
                        for k := range usedStreams {
                                usedFiles[k.fileIndex] = true
                        }

                        // Sort file indices for deterministic output.
                        sortedFiles := make([]uint16, 0, len(usedFiles))
                        for fi := range usedFiles {
                                sortedFiles = append(sortedFiles, fi)
                        }
                        sort.Slice(sortedFiles, func(i, j int) bool { return sortedFiles[i] < sortedFiles[j] })

                        var rangeMaps []dedup.RangeMapData
                        for _, fi := range sortedFiles {
                                i := int(fi)
                                if i >= len(index.ESReaders) {
                                        continue
                                }
                                reader := index.ESReaders[i]
                                provider, ok := reader.(source.PESRangeProvider)
                                if !ok {
                                        continue
                                }
                                rm := dedup.RangeMapData{
                                        FileIndex: fi,
                                }
                                // Only include video range map if video entries reference this file
                                if usedStreams[streamKey{fi, true, 0}] {
                                        rm.VideoRanges = provider.FilteredVideoRanges()
                                }
                                // If this reader provides offset conversion (e.g., ISO adapter),
                                // set the converter for range map encoding.
                                if adj, ok := reader.(source.FileOffsetAdjuster); ok {
                                        rm.OffsetFunc = adj.FileOffsetConverter()
                                }
                                // Only include audio sub-stream range maps that are actually used
                                for _, subID := range provider.AudioSubStreams() {
                                        if usedStreams[streamKey{fi, false, subID}] {
                                                rm.AudioStreams = append(rm.AudioStreams, dedup.AudioRangeData{
                                                        SubStreamID: subID,
                                                        Ranges:      provider.FilteredAudioRanges(subID),
                                                })
                                        }
                                }
                                rangeMaps = append(rangeMaps, rm)
                        }
                        if vw := verboseWriter(); vw != nil {
                                fmt.Fprintf(vw, "  Range maps: %d/%d source files used, %d streams referenced\n",
                                        len(usedFiles), len(index.ESReaders), len(usedStreams))
                        }
                        if len(rangeMaps) > 0 {
                                writer.SetRangeMaps(rangeMaps)
                        }
                } else {
                        // V3: convert ES offsets to raw offsets for DVDs
                        esConverters = make([]source.ESRangeConverter, len(index.ESReaders))
                        for i, r := range index.ESReaders {
                                if converter, ok := r.(source.ESRangeConverter); ok {
                                        esConverters[i] = converter
                                }
                        }
                }
        }

        if err := writer.SetMatchResult(matchResult, esConverters); err != nil {
                os.Remove(outputPath)
                result.Err = fmt.Errorf("set match result: %w", err)
                return result
        }

        // Pre-encode range maps (CPU-intensive) before the progress-tracked write.
        rangeMapSize, err := writer.EncodeRangeMaps()
        if err != nil {
                os.Remove(outputPath)
                result.Err = fmt.Errorf("encode range maps: %w", err)
                return result
        }
        if rangeMapSize > 0 {
                printInfo("  Range maps encoded: %s bytes\n", formatInt(rangeMapSize))
        }

        writeBar := newProgressBar(phaseLabel(2, "Writing dedup file..."), 0, "bytes")
        if err := writer.WriteWithProgress(func(written, total int64) {
                if writeBar.total == 0 && total > 0 {
                        writeBar.total = total
                }
                writeBar.Update(written)
        }); err != nil {
                writeBar.Cancel()
                os.Remove(outputPath)
                result.Err = fmt.Errorf("write dedup file: %w", err)
                return result
        }
        writeBar.Finish()

        // Write config file
        configPath := outputPath + ".yaml"
        if err := dedup.WriteConfig(configPath, virtualName, outputPath, sourceDir); err != nil {
                printInfo("  Warning: failed to write config file: %v\n", err)
        } else {
                printInfo("  Config: %s\n", configPath)
        }

        // Verify reconstruction
        verifyPrefix := phaseLabel(3, "Verifying reconstruction...")
        outputPath = handleVerifyResult(outputPath, sourceDir, mkvPath, index, verifyPrefix, result)

        // Populate result
        result.MkvSize = parser.Size()
        result.MatchedBytes = matchResult.MatchedBytes
        result.UnmatchedBytes = matchResult.UnmatchedBytes
        result.MatchedPackets = matchResult.MatchedPackets
        result.TotalPackets = matchResult.TotalPackets
        result.IndexEntries = len(matchResult.Entries)

        dedupInfo, _ := os.Stat(outputPath)
        if dedupInfo != nil {
                result.DedupSize = dedupInfo.Size()
                result.Savings = float64(result.MkvSize-result.DedupSize) / float64(result.MkvSize) * 100
        }
        result.Duration = time.Since(start)

        return result
}

// handleVerifyResult runs post-write verification and handles failures.
// On failure: removes the config file, renames .mkvdup to .mkvdup.failed,
// and sets result.VerifyErr. Returns the (possibly updated) outputPath.
func handleVerifyResult(outputPath, sourceDir, mkvPath string, index *source.Index, phasePrefix string, result *createResult) string {
        if err := verifyReconstructionFunc(outputPath, sourceDir, mkvPath, index, phasePrefix); err != nil {
                printWarn("  ERROR: Verification failed: %v\n", err)

                // Remove orphaned config file (it references the pre-rename path)
                configPath := outputPath + ".yaml"
                if rmErr := osRemove(configPath); rmErr != nil && !os.IsNotExist(rmErr) {
                        printWarn("  ERROR: Failed to remove config file %s: %v\n", configPath, rmErr)
                } else if rmErr == nil {
                        printWarn("  Removed config file: %s\n", configPath)
                }

                // Rename broken file to .mkvdup.failed, overwriting any previous .failed file
                failedPath := outputPath + ".failed"
                if rmErr := osRemove(failedPath); rmErr != nil && !os.IsNotExist(rmErr) {
                        printWarn("  ERROR: Failed to remove existing failed file %s: %v\n", failedPath, rmErr)
                }
                if renameErr := osRename(outputPath, failedPath); renameErr != nil {
                        printWarn("  ERROR: Failed to rename broken file: %v\n", renameErr)
                } else {
                        printWarn("  Renamed to: %s\n", failedPath)
                        outputPath = failedPath
                        result.OutputPath = failedPath
                }

                result.VerifyErr = err
        }
        return outputPath
}

// createDedup creates a .mkvdup file from an MKV and source directory.
func createDedup(mkvPath, sourceDir, outputPath, virtualName string, warnThreshold float64, nonInteractive bool) error {
        totalStart := time.Now()

        // Default virtual name
        if virtualName == "" {
                virtualName = filepath.Base(mkvPath)
        }
        // Ensure virtual name has .mkv extension
        if !strings.HasSuffix(strings.ToLower(virtualName), ".mkv") {
                virtualName += ".mkv"
        }

        printInfoln("Creating dedup file...")
        printInfo("  MKV:     %s\n", mkvPath)
        printInfo("  Source:  %s\n", sourceDir)
        printInfo("  Output:  %s\n", outputPath)
        printInfoln()

        // Phase 1: Quick codec compatibility check (only reads MKV track headers, not full file)
        printInfo("Phase 1/6: Checking codec compatibility...")
        codecParser, err := mkv.NewParser(mkvPath)
        if err != nil {
                return fmt.Errorf("open MKV: %w", err)
        }
        if err := codecParser.ParseTracksOnly(); err != nil {
                // Fail open: this fast-path parser can't handle all MKV layouts.
                // Log and continue without the pre-index codec compatibility check.
                log.Printf("Warning: fast MKV track parsing failed for %q: %v; continuing without pre-index codec check", mkvPath, err)
                codecParser.Close()
        } else {
                if err := checkCodecCompatibilityFromDir(codecParser.Tracks(), sourceDir, nonInteractive); err != nil {
                        codecParser.Close()
                        return err
                }
                codecParser.Close()
        }
        printInfoln(" done")

        // Phase 2: Index source (expensive)
        indexer, index, err := buildSourceIndex(sourceDir, "Phase 2/6: Building source index...")
        if err != nil {
                return err
        }
        defer index.Close()

        // Phase 3-6: Process MKV (re-parses MKV, but parsing is fast relative to indexing)
        result := createDedupWithIndex(mkvPath, sourceDir, outputPath, virtualName, indexer, index, 3, 6, nonInteractive, false)
        if result.Err != nil {
                return result.Err
        }

        // Summary
        printInfoln()
        printInfoln("=== Results ===")
        printInfo("Total time: %v\n", time.Since(totalStart))
        printInfoln()

        printInfo("MKV file size:      %s bytes (%.2f MB)\n", formatInt(result.MkvSize), float64(result.MkvSize)/(1024*1024))
        printInfo("Matched bytes:      %s bytes (%.2f MB, %.1f%%)\n",
                formatInt(result.MatchedBytes), float64(result.MatchedBytes)/(1024*1024),
                float64(result.MatchedBytes)/float64(result.MkvSize)*100)
        printInfo("Delta (unmatched):  %s bytes (%.2f MB, %.1f%%)\n",
                formatInt(result.UnmatchedBytes), float64(result.UnmatchedBytes)/(1024*1024),
                float64(result.UnmatchedBytes)/float64(result.MkvSize)*100)
        printInfoln()

        printInfo("Dedup file size:    %s bytes (%.2f MB)\n", formatInt(result.DedupSize), float64(result.DedupSize)/(1024*1024))
        printInfo("Space savings:      %.1f%%\n", result.Savings)
        printInfoln()

        printInfo("Packets matched:    %s / %s (%.1f%%)\n",
                formatInt(int64(result.MatchedPackets)), formatInt(int64(result.TotalPackets)),
                float64(result.MatchedPackets)/float64(result.TotalPackets)*100)
        printInfo("Index entries:      %s\n", formatInt(int64(result.IndexEntries)))

        // Warning for low savings
        if !quiet && result.Savings < warnThreshold {
                printInfoln()
                printInfo("WARNING: Space savings (%.1f%%) below %.0f%%\n", result.Savings, warnThreshold)
                printInfoln("  This may indicate wrong source, transcoded MKV, or very small MKV file.")
        }

        if result.VerifyErr != nil {
                return fmt.Errorf("verification failed: %w", result.VerifyErr)
        }

        return nil
}

package main

import (
        "fmt"
        "time"

        "github.com/stuckj/mkvdup/internal/dedup"
        "github.com/stuckj/mkvdup/internal/matcher"
        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/source"
)

func parseMKV(path string) error {
        fmt.Printf("Parsing MKV file: %s\n", path)

        parser, err := mkv.NewParser(path)
        if err != nil {
                return fmt.Errorf("create parser: %w", err)
        }
        defer parser.Close()

        fmt.Printf("File size: %s bytes (%.2f GB)\n", formatInt(parser.Size()), float64(parser.Size())/(1024*1024*1024))

        start := time.Now()
        lastProgress := time.Now()

        err = parser.Parse(func(processed, total int64) {
                if time.Since(lastProgress) > 500*time.Millisecond {
                        pct := float64(processed) / float64(total) * 100
                        fmt.Printf("\rProgress: %.1f%% (%s / %s bytes)", pct, formatInt(processed), formatInt(total))
                        lastProgress = time.Now()
                }
        })
        if err != nil {
                return fmt.Errorf("parse: %w", err)
        }

        elapsed := time.Since(start)
        fmt.Printf("\rProgress: 100.0%% - Complete                    \n")
        fmt.Printf("Parse time: %v\n", elapsed)
        fmt.Println()

        fmt.Printf("Tracks: %d\n", len(parser.Tracks()))
        for _, t := range parser.Tracks() {
                typeStr := "unknown"
                switch t.Type {
                case mkv.TrackTypeVideo:
                        typeStr = "video"
                case mkv.TrackTypeAudio:
                        typeStr = "audio"
                case mkv.TrackTypeSubtitle:
                        typeStr = "subtitle"
                }
                extra := ""
                if t.Type == mkv.TrackTypeVideo {
                        nalSize := matcher.NALLengthSizeForTrack(t.CodecID, t.CodecPrivate)
                        if nalSize > 0 {
                                extra = fmt.Sprintf(", NAL length: %d bytes (AVCC/HVCC)", nalSize)
                        } else {
                                extra = ", Annex B"
                        }
                }
                fmt.Printf("  Track %d: %s (codec: %s%s)\n", t.Number, typeStr, t.CodecID, extra)
        }
        fmt.Println()

        fmt.Printf("Total packets: %d\n", parser.PacketCount())
        fmt.Printf("  Video packets: %d\n", parser.VideoPacketCount())
        fmt.Printf("  Audio packets: %d\n", parser.AudioPacketCount())

        // Show some sample packets
        packets := parser.Packets()
        if len(packets) > 0 {
                fmt.Println()
                fmt.Println("Sample packets (first 5):")
                for i := 0; i < 5 && i < len(packets); i++ {
                        p := packets[i]
                        fmt.Printf("  Packet %d: offset=%d, size=%d, track=%d, keyframe=%v\n",
                                i, p.Offset, p.Size, p.TrackNum, p.Keyframe)
                }
        }

        return nil
}

func indexSource(dir string) error {
        fmt.Printf("Indexing source directory: %s\n", dir)

        indexer, err := source.NewIndexer(dir, source.DefaultWindowSize)
        if err != nil {
                return fmt.Errorf("create indexer: %w", err)
        }

        fmt.Printf("Source type: %s\n", indexer.SourceType())

        start := time.Now()
        lastProgress := time.Now()

        err = indexer.Build(func(processed, total int64) {
                if time.Since(lastProgress) > 500*time.Millisecond {
                        pct := float64(processed) / float64(total) * 100
                        fmt.Printf("\rProgress: %.1f%% (%s / %s bytes)", pct, formatInt(processed), formatInt(total))
                        lastProgress = time.Now()
                }
        })
        if err != nil {
                return fmt.Errorf("build index: %w", err)
        }

        elapsed := time.Since(start)
        fmt.Printf("\rProgress: 100.0%% - Complete                    \n")
        fmt.Printf("Index time: %v\n", elapsed)
        fmt.Println()

        index := indexer.Index()
        defer index.Close()
        fmt.Printf("Source files: %d\n", len(index.Files))
        for _, f := range index.Files {
                fmt.Printf("  %s: %s bytes\n", f.RelativePath, formatInt(f.Size))
        }
        fmt.Println()

        fmt.Printf("Unique hashes: %d\n", len(index.HashToLocations))
        if index.UsesESOffsets {
                containerType := "MPEG-PS"
                if indexer.SourceType() == source.TypeBluray {
                        containerType = "MPEG-TS"
                }
                fmt.Printf("Index type: ES-aware (%s)\n", containerType)
        }

        // Count total locations
        totalLocations := 0
        for _, locs := range index.HashToLocations {
                totalLocations += len(locs)
        }
        fmt.Printf("Total indexed locations: %d\n", totalLocations)

        return nil
}

func matchMKV(mkvPath, sourceDir string) error {
        totalStart := time.Now()

        // Phase 1: Parse MKV
        parser, _, err := parseMKVWithProgress(mkvPath, "Phase 1/3: Parsing MKV file...")
        if err != nil {
                return err
        }
        defer parser.Close()

        // Phase 2: Index source
        _, index, err := buildSourceIndex(sourceDir, "Phase 2/3: Indexing source...")
        if err != nil {
                return err
        }
        defer index.Close()

        // Phase 3: Match packets
        fmt.Println("Phase 3/3: Matching packets...")
        m, err := matcher.NewMatcher(index)
        if err != nil {
                return fmt.Errorf("create matcher: %w", err)
        }
        defer m.Close()

        start := time.Now()
        lastProgress := time.Now()
        result, err := m.Match(mkvPath, parser.Packets(), parser.Tracks(), func(processed, total int) {
                if time.Since(lastProgress) > 500*time.Millisecond {
                        pct := float64(processed) / float64(total) * 100
                        fmt.Printf("\r  Progress: %.1f%% (%d/%d packets)", pct, processed, total)
                        lastProgress = time.Now()
                }
        })
        if err != nil {
                return fmt.Errorf("match: %w", err)
        }
        fmt.Printf("\r  Matched in %v                              \n", time.Since(start))

        // Summary
        fmt.Println()
        fmt.Println("=== Results ===")
        fmt.Printf("Total time: %v\n", time.Since(totalStart))
        fmt.Println()

        mkvSize := parser.Size()
        fmt.Printf("MKV file size:      %s bytes (%.2f MB)\n", formatInt(mkvSize), float64(mkvSize)/(1024*1024))
        fmt.Printf("Matched bytes:      %s bytes (%.2f MB, %.1f%%)\n",
                formatInt(result.MatchedBytes), float64(result.MatchedBytes)/(1024*1024),
                float64(result.MatchedBytes)/float64(mkvSize)*100)
        fmt.Printf("Delta (unmatched):  %s bytes (%.2f MB, %.1f%%)\n",
                formatInt(result.UnmatchedBytes), float64(result.UnmatchedBytes)/(1024*1024),
                float64(result.UnmatchedBytes)/float64(mkvSize)*100)
        fmt.Println()

        fmt.Printf("Packets matched:    %d / %d (%.1f%%)\n",
                result.MatchedPackets, result.TotalPackets,
                float64(result.MatchedPackets)/float64(result.TotalPackets)*100)
        fmt.Printf("Index entries:      %d\n", len(result.Entries))
        fmt.Println()

        // Storage savings (using actual format constants)
        indexSize := int64(len(result.Entries) * dedup.EntrySize)
        headerSize := int64(dedup.HeaderSize)
        footerSize := int64(dedup.FooterSize)
        totalDedupSize := headerSize + indexSize + int64(len(result.DeltaData)) + footerSize

        // For Blu-ray sources, V4 format includes range map section (estimate)
        rangeMapNote := ""
        if index.UsesESOffsets {
                // Range map is compressed; rough estimate is ~5-10% of index size
                rangeMapEstimate := indexSize / 10
                totalDedupSize += rangeMapEstimate
                footerSize = int64(dedup.FooterV4Size)
                rangeMapNote = fmt.Sprintf(" + ~%s range map", formatInt(rangeMapEstimate))
        }

        savings := float64(mkvSize-totalDedupSize) / float64(mkvSize) * 100

        fmt.Printf("Estimated dedup file size:\n")
        fmt.Printf("  Header:     %s bytes\n", formatInt(headerSize))
        fmt.Printf("  Index:      %s bytes (%s entries × %d)\n", formatInt(indexSize), formatInt(int64(len(result.Entries))), dedup.EntrySize)
        fmt.Printf("  Delta:      %s bytes\n", formatInt(int64(len(result.DeltaData))))
        fmt.Printf("  Footer:     %s bytes\n", formatInt(footerSize))
        fmt.Printf("  Total:      ~%s bytes (%.2f MB)%s\n", formatInt(totalDedupSize), float64(totalDedupSize)/(1024*1024), rangeMapNote)
        fmt.Printf("  Savings:    ~%.1f%% reduction\n", savings)

        return nil
}

package main

import (
        "encoding/binary"
        "fmt"
        "sort"
        "strings"

        "github.com/stuckj/mkvdup/internal/dedup"
        "github.com/stuckj/mkvdup/internal/matcher"
        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/mmap"
)

// deltaClass accumulates byte count and entry count for delta classification.
type deltaClass struct {
        bytes int64
        count int
}

// deltadiag analyzes delta (unmatched) entries in a .mkvdup file by
// cross-referencing with the original MKV to classify what stream type
// each delta region belongs to (video/audio/container).
func deltadiag(dedupPath, mkvPath string) error {
        // Open dedup file
        reader, err := dedup.NewReader(dedupPath, "")
        if err != nil {
                return fmt.Errorf("open dedup file: %w", err)
        }
        defer reader.Close()

        entryCount := reader.EntryCount()
        origSize := reader.OriginalSize()
        printWarn("Dedup file: %d %s, original size %s bytes (%.2f MB)\n",
                entryCount, plural(entryCount, "entry", "entries"), formatInt(origSize), float64(origSize)/(1024*1024))

        // Parse MKV to get packet boundaries
        printWarn("Parsing MKV file...\n")
        mkvParser, err := mkv.NewParser(mkvPath)
        if err != nil {
                return fmt.Errorf("create MKV parser: %w", err)
        }
        defer mkvParser.Close()

        if err := mkvParser.Parse(nil); err != nil {
                return fmt.Errorf("parse MKV: %w", err)
        }

        packets := mkvParser.Packets()
        tracks := mkvParser.Tracks()
        printWarn("  %d packets, %d tracks\n", len(packets), len(tracks))

        // Build track type map and detect AVCC NAL length size
        trackTypes := make(map[int]int)
        trackCodecs := make(map[int]string)
        nalLenSizes := make(map[int]int)
        isAVCTrack := make(map[int]bool)
        for _, t := range tracks {
                trackTypes[int(t.Number)] = t.Type
                trackCodecs[int(t.Number)] = t.CodecID
                nalLenSizes[int(t.Number)] = matcher.NALLengthSizeForTrack(t.CodecID, t.CodecPrivate)
                if strings.HasPrefix(t.CodecID, "V_MPEG4/ISO/AVC") {
                        isAVCTrack[int(t.Number)] = true
                }
        }

        // Memory-map MKV for reading delta bytes
        mkvMmap, err := mmap.Open(mkvPath)
        if err != nil {
                return fmt.Errorf("mmap MKV: %w", err)
        }
        defer mkvMmap.Close()
        mkvData := mkvMmap.Data()

        // Sort packets by offset for binary search
        sort.Slice(packets, func(i, j int) bool {
                return packets[i].Offset < packets[j].Offset
        })

        // Classify each delta entry
        printWarn("Classifying delta entries...\n")

        var deltaVideo, deltaAudio, deltaContainer deltaClass
        deltaAudioByCodec := make(map[string]*deltaClass)
        var deltaVideoByNAL [32]deltaClass
        var deltaVideoSliceSmall, deltaVideoSliceLarge deltaClass

        for i := 0; i < entryCount; i++ {
                ent, ok := reader.GetEntry(i)
                if !ok {
                        continue
                }
                if ent.Source != 0 {
                        continue // Skip matched entries
                }

                entStart := ent.MkvOffset
                entEnd := entStart + ent.Length

                // Walk through the delta entry's byte range, classifying each portion
                // based on which MKV packet (if any) it overlaps. A single delta entry
                // can span multiple packets and container gaps when large unmatched
                // regions (e.g., LPCM audio) create contiguous delta runs.
                pos := entStart
                for pos < entEnd {
                        pktIdx := deltadiagFindPacket(packets, pos)
                        if pktIdx < 0 {
                                // Not inside any packet — find the next packet start
                                nextPkt := deltadiagFindNextPacket(packets, pos)
                                var gapEnd int64
                                if nextPkt >= 0 && packets[nextPkt].Offset < entEnd {
                                        gapEnd = packets[nextPkt].Offset
                                } else {
                                        gapEnd = entEnd
                                }
                                gapBytes := gapEnd - pos
                                deltaContainer.bytes += gapBytes
                                deltaContainer.count++
                                pos = gapEnd
                                continue
                        }

                        pkt := packets[pktIdx]
                        pktEnd := pkt.Offset + pkt.Size
                        overlapEnd := entEnd
                        if overlapEnd > pktEnd {
                                overlapEnd = pktEnd
                        }
                        overlapBytes := overlapEnd - pos

                        ttype := trackTypes[int(pkt.TrackNum)]
                        if ttype == mkv.TrackTypeVideo {
                                deltaVideo.bytes += overlapBytes
                                deltaVideo.count++

                                // Parse AVCC NALs in the delta region
                                nalLenSize := nalLenSizes[int(pkt.TrackNum)]
                                if nalLenSize > 0 && isAVCTrack[int(pkt.TrackNum)] && overlapBytes >= int64(nalLenSize+1) {
                                        deltaStart := pos
                                        deltaEnd := overlapEnd
                                        if deltaEnd <= int64(len(mkvData)) {
                                                deltadiagClassifyAVCC(mkvData, pkt, nalLenSize, deltaStart, deltaEnd,
                                                        &deltaVideoByNAL, &deltaVideoSliceSmall, &deltaVideoSliceLarge)
                                        }
                                }
                        } else if ttype == mkv.TrackTypeAudio {
                                deltaAudio.bytes += overlapBytes
                                deltaAudio.count++
                                codec := trackCodecs[int(pkt.TrackNum)]
                                if codec == "" {
                                        codec = "unknown"
                                }
                                dc := deltaAudioByCodec[codec]
                                if dc == nil {
                                        dc = &deltaClass{}
                                        deltaAudioByCodec[codec] = dc
                                }
                                dc.bytes += overlapBytes
                                dc.count++
                        } else {
                                deltaContainer.bytes += overlapBytes
                                deltaContainer.count++
                        }

                        pos = overlapEnd
                }
        }

        // Print results
        totalDelta := deltaVideo.bytes + deltaAudio.bytes + deltaContainer.bytes
        if totalDelta == 0 {
                fmt.Printf("\nNo delta entries found (100%% matched).\n")
                return nil
        }

        fmt.Printf("\n=== Delta Classification ===\n")
        fmt.Printf("Total delta: %s bytes (%.2f MB)\n\n", formatInt(totalDelta), float64(totalDelta)/(1024*1024))

        fmt.Printf("Video delta:     %12s bytes (%8.2f MB) [%6d entries] (%.1f%% of delta)\n",
                formatInt(deltaVideo.bytes), float64(deltaVideo.bytes)/(1024*1024), deltaVideo.count,
                float64(deltaVideo.bytes)/float64(totalDelta)*100)
        fmt.Printf("Audio delta:     %12s bytes (%8.2f MB) [%6d entries] (%.1f%% of delta)\n",
                formatInt(deltaAudio.bytes), float64(deltaAudio.bytes)/(1024*1024), deltaAudio.count,
                float64(deltaAudio.bytes)/float64(totalDelta)*100)
        fmt.Printf("Container delta: %12s bytes (%8.2f MB) [%6d entries] (%.1f%% of delta)\n",
                formatInt(deltaContainer.bytes), float64(deltaContainer.bytes)/(1024*1024), deltaContainer.count,
                float64(deltaContainer.bytes)/float64(totalDelta)*100)

        // Audio codec breakdown
        if len(deltaAudioByCodec) > 0 {
                fmt.Printf("\n=== Audio Delta by Codec ===\n")
                codecs := make([]string, 0, len(deltaAudioByCodec))
                for codec := range deltaAudioByCodec {
                        codecs = append(codecs, codec)
                }
                sort.Strings(codecs)
                for _, codec := range codecs {
                        dc := deltaAudioByCodec[codec]
                        fmt.Printf("  %-20s: %10s bytes (%8.2f MB) [%6d entries]\n",
                                codec, formatInt(dc.bytes), float64(dc.bytes)/(1024*1024), dc.count)
                }
        }

        // Video NAL type breakdown
        nalTypeNames := map[int]string{
                1: "non-IDR slice", 2: "slice A", 3: "slice B", 4: "slice C",
                5: "IDR slice", 6: "SEI", 7: "SPS", 8: "PPS", 9: "AUD", 12: "filler",
        }

        hasNALBreakdown := false
        for i := 0; i < 32; i++ {
                if deltaVideoByNAL[i].count > 0 {
                        hasNALBreakdown = true
                        break
                }
        }
        if hasNALBreakdown {
                fmt.Printf("\n=== Video Delta by H.264 NAL Type ===\n")
                for i := 0; i < 32; i++ {
                        if deltaVideoByNAL[i].count == 0 {
                                continue
                        }
                        name := nalTypeNames[i]
                        if name == "" {
                                name = fmt.Sprintf("type %d", i)
                        }
                        fmt.Printf("  %-14s: %10s bytes (%8.2f MB) [%6d NALs]\n",
                                name, formatInt(deltaVideoByNAL[i].bytes),
                                float64(deltaVideoByNAL[i].bytes)/(1024*1024),
                                deltaVideoByNAL[i].count)
                }

                fmt.Printf("\n=== Video Slice Delta Size Breakdown ===\n")
                fmt.Printf("  Slice NALs < 4KB:  %10s bytes (%8.2f MB) [%6d NALs]\n",
                        formatInt(deltaVideoSliceSmall.bytes), float64(deltaVideoSliceSmall.bytes)/(1024*1024),
                        deltaVideoSliceSmall.count)
                fmt.Printf("  Slice NALs >= 4KB: %10s bytes (%8.2f MB) [%6d NALs]\n",
                        formatInt(deltaVideoSliceLarge.bytes), float64(deltaVideoSliceLarge.bytes)/(1024*1024),
                        deltaVideoSliceLarge.count)
        }

        // Summary
        fmt.Printf("\n=== Summary ===\n")
        fmt.Printf("Original file:    %.2f MB\n", float64(origSize)/(1024*1024))
        fmt.Printf("Total delta:      %.2f MB (%.1f%% of original)\n",
                float64(totalDelta)/(1024*1024), float64(totalDelta)/float64(origSize)*100)
        fmt.Printf("  Video delta:    %.2f MB (%.1f%% of delta)\n",
                float64(deltaVideo.bytes)/(1024*1024), float64(deltaVideo.bytes)/float64(totalDelta)*100)
        fmt.Printf("  Audio delta:    %.2f MB (%.1f%% of delta)\n",
                float64(deltaAudio.bytes)/(1024*1024), float64(deltaAudio.bytes)/float64(totalDelta)*100)
        fmt.Printf("  Container:      %.2f MB (%.1f%% of delta)\n",
                float64(deltaContainer.bytes)/(1024*1024), float64(deltaContainer.bytes)/float64(totalDelta)*100)

        return nil
}

// deltadiagFindPacket finds the packet containing the given offset using binary search.
func deltadiagFindPacket(packets []mkv.Packet, offset int64) int {
        low, high := 0, len(packets)-1
        for low <= high {
                mid := (low + high) / 2
                pkt := packets[mid]
                if offset < pkt.Offset {
                        high = mid - 1
                } else if offset >= pkt.Offset+pkt.Size {
                        low = mid + 1
                } else {
                        return mid
                }
        }
        return -1
}

// deltadiagFindNextPacket finds the first packet starting at or after the given offset.
func deltadiagFindNextPacket(packets []mkv.Packet, offset int64) int {
        low, high := 0, len(packets)-1
        result := -1
        for low <= high {
                mid := (low + high) / 2
                if packets[mid].Offset >= offset {
                        result = mid
                        high = mid - 1
                } else {
                        low = mid + 1
                }
        }
        return result
}

// deltadiagClassifyAVCC parses AVCC NAL units within a packet to classify which
// NAL types fall within the delta region [deltaStart, deltaEnd).
func deltadiagClassifyAVCC(mkvData []byte, pkt mkv.Packet, nalLenSize int,
        deltaStart, deltaEnd int64,
        byNAL *[32]deltaClass, sliceSmall, sliceLarge *deltaClass) {

        pktEnd := pkt.Offset + pkt.Size
        if pktEnd > int64(len(mkvData)) {
                pktEnd = int64(len(mkvData))
        }
        pktData := mkvData[pkt.Offset:pktEnd]

        pos := 0
        for pos+nalLenSize < len(pktData) {
                var nalLen uint32
                switch nalLenSize {
                case 4:
                        nalLen = binary.BigEndian.Uint32(pktData[pos:])
                case 2:
                        nalLen = uint32(binary.BigEndian.Uint16(pktData[pos:]))
                case 1:
                        nalLen = uint32(pktData[pos])
                }

                nalDataStart := pkt.Offset + int64(pos+nalLenSize)
                nalDataEnd := nalDataStart + int64(nalLen)
                if nalLen == 0 || nalDataEnd > pktEnd {
                        break
                }

                nalFullStart := pkt.Offset + int64(pos)

                // Check overlap with delta region
                overlapStart := nalFullStart
                if overlapStart < deltaStart {
                        overlapStart = deltaStart
                }
                overlapEnd := nalDataEnd
                if overlapEnd > deltaEnd {
                        overlapEnd = deltaEnd
                }
                if overlapStart < overlapEnd {
                        overlapBytes := overlapEnd - overlapStart

                        if nalDataStart < int64(len(mkvData)) {
                                nalType := mkvData[nalDataStart] & 0x1F
                                byNAL[nalType].bytes += overlapBytes
                                byNAL[nalType].count++

                                if nalType == 1 || nalType == 5 {
                                        if nalLen >= 4096 {
                                                sliceLarge.bytes += overlapBytes
                                                sliceLarge.count++
                                        } else {
                                                sliceSmall.bytes += overlapBytes
                                                sliceSmall.count++
                                        }
                                }
                        }
                }

                pos = int(nalDataEnd - pkt.Offset)
                if pos <= 0 {
                        break
                }
        }
}

package main

import (
        "fmt"
        "os"
        "path/filepath"
        "strings"
        "time"

        "github.com/stuckj/mkvdup/internal/dedup"
)

// expandConfigCmd reads a config file (same format as mount/validate), resolves
// all includes glob patterns to explicit paths, and writes an expanded config
// that is semantically equivalent but with no wildcard patterns.
func expandConfigCmd(configPath string, outputPath string, dryRun bool) error {
        expanded, err := dedup.ExpandConfigFile(configPath)
        if err != nil {
                return err
        }

        // Add a generation header comment.
        absConfigPath, err := filepath.Abs(configPath)
        if err != nil {
                absConfigPath = configPath
        }

        var sb strings.Builder
        fmt.Fprintf(&sb, "# Auto-generated by: mkvdup expand-config\n")
        fmt.Fprintf(&sb, "# Source: %s\n", absConfigPath)
        fmt.Fprintf(&sb, "# Generated: %s\n", time.Now().UTC().Format(time.RFC3339))
        sb.Write(expanded)

        output := sb.String()

        if dryRun {
                fmt.Print(output)
                return nil
        }

        if outputPath == "" {
                fmt.Print(output)
                return nil
        }

        // Skip rewrite if the YAML content (non-comment lines) is unchanged,
        // to avoid triggering unnecessary mount reloads.
        if existing, err := os.ReadFile(outputPath); err == nil {
                if yamlContent(string(existing)) == yamlContent(output) {
                        if !quiet {
                                fmt.Fprintf(os.Stderr, "No changes to %s\n", outputPath)
                        }
                        return nil
                }
        }

        if err := os.WriteFile(outputPath, []byte(output), 0644); err != nil {
                return fmt.Errorf("write output file: %w", err)
        }

        absOutput, err := filepath.Abs(outputPath)
        if err != nil || absOutput == "" {
                absOutput = outputPath
        }
        if !quiet {
                fmt.Fprintf(os.Stderr, "Wrote %s\n", absOutput)
        }

        return nil
}

// yamlContent extracts the non-comment lines from a YAML string for comparison.
// This allows the header comments (timestamp, etc.) to change without triggering
// a rewrite when the actual config content is unchanged.
func yamlContent(s string) string {
        var lines []string
        for _, line := range strings.Split(s, "\n") {
                if !strings.HasPrefix(line, "#") {
                        lines = append(lines, line)
                }
        }
        return strings.Join(lines, "\n")
}

package main

import (
        "os"
        "strconv"
)

// formatInt formats an integer with thousands separators (e.g., 1234567 → "1,234,567").
func formatInt(n int64) string {
        s := strconv.FormatInt(n, 10)
        if len(s) <= 3 {
                return s
        }
        // Insert commas from the right
        var result []byte
        for i, c := range s {
                if i > 0 && (len(s)-i)%3 == 0 {
                        result = append(result, ',')
                }
                result = append(result, byte(c))
        }
        return string(result)
}

// plural returns singular when n == 1, plural otherwise.
// Example: plural(n, "file", "files")
func plural(n int, singular, pl string) string {
        if n == 1 {
                return singular
        }
        return pl
}

// isTerminal returns true if stdin is a terminal (not piped).
func isTerminal() bool {
        fi, err := os.Stdin.Stat()
        if err != nil {
                return false
        }
        return fi.Mode()&os.ModeCharDevice != 0
}

package main

import (
        "fmt"
        "io"
        "os"
        "path/filepath"
        "strings"
        "time"

        "github.com/cespare/xxhash/v2"
        "github.com/stuckj/mkvdup/internal/dedup"
)

// showInfo displays information about a dedup file.
func showInfo(dedupPath string, hideUnused bool) error {
        reader, err := dedup.NewReader(dedupPath, "")
        if err != nil {
                return fmt.Errorf("open dedup file: %w", err)
        }
        defer reader.Close()

        info := reader.Info()

        fmt.Printf("Dedup file: %s\n", dedupPath)
        fmt.Println()

        creatorVersion := info["creator_version"].(string)
        if creatorVersion != "" {
                fmt.Printf("Created by:         %s\n", creatorVersion)
        } else {
                fmt.Printf("Created by:         unknown (pre-0.9.0)\n")
        }
        fmt.Printf("Format version:     %d\n", info["version"].(uint32))
        fmt.Printf("Original MKV size:  %s bytes (%.2f MB)\n",
                formatInt(info["original_size"].(int64)),
                float64(info["original_size"].(int64))/(1024*1024))
        fmt.Printf("Original checksum:  %016x\n", info["original_checksum"].(uint64))
        fmt.Println()

        sourceType := "Unknown"
        switch info["source_type"].(uint8) {
        case 0:
                sourceType = "DVD"
        case 1:
                sourceType = "Blu-ray"
        }
        fmt.Printf("Source type:        %s\n", sourceType)
        fmt.Printf("Uses ES offsets:    %v\n", info["uses_es_offsets"].(bool))
        if info["has_range_maps"].(bool) {
                fmt.Printf("Has range maps:     true\n")
        }
        fmt.Printf("Source file count:  %d\n", info["source_file_count"].(int))
        fmt.Printf("Index entry count:  %d\n", info["entry_count"].(int))
        fmt.Printf("Delta size:         %s bytes (%.2f MB)\n",
                formatInt(info["delta_size"].(int64)),
                float64(info["delta_size"].(int64))/(1024*1024))
        fmt.Println()

        // Source files
        fmt.Println("Source files:")
        hasUsedFlags := reader.HasSourceUsedFlags()
        for _, sf := range reader.SourceFiles() {
                if hideUnused && hasUsedFlags && !sf.Used {
                        continue
                }
                suffix := ""
                if hasUsedFlags && !sf.Used {
                        suffix = " (unused)"
                }
                fmt.Printf("  %s (%s bytes)%s\n", sf.RelativePath, formatInt(sf.Size), suffix)
        }

        return nil
}

// calculateFileChecksum calculates xxhash checksum of a file.
func calculateFileChecksum(path string) (uint64, error) {
        return calculateFileChecksumWithProgress(path, 0, "")
}

// calculateFileChecksumWithProgress calculates xxhash checksum of a file,
// showing inline progress when expectedSize > 0.
func calculateFileChecksumWithProgress(path string, expectedSize int64, displayName string) (uint64, error) {
        f, err := os.Open(path)
        if err != nil {
                return 0, err
        }
        defer f.Close()

        hasher := xxhash.New()
        showProgress := expectedSize > 0

        if !showProgress {
                if _, err := io.Copy(hasher, f); err != nil {
                        return 0, err
                }
                return hasher.Sum64(), nil
        }

        buf := make([]byte, 4*1024*1024) // 4MB buffer
        var processed int64
        lastProgress := time.Time{}

        for {
                n, err := f.Read(buf)
                if n > 0 {
                        if _, werr := hasher.Write(buf[:n]); werr != nil {
                                return 0, werr
                        }
                        processed += int64(n)

                        if time.Since(lastProgress) > 500*time.Millisecond {
                                pct := float64(processed) / float64(expectedSize) * 100
                                fmt.Printf("\r  Verifying %s... %.1f%%", displayName, pct)
                                lastProgress = time.Now()
                        }
                }
                if err == io.EOF {
                        break
                }
                if err != nil {
                        return 0, err
                }
        }

        // Clear progress line
        progressText := fmt.Sprintf("  Verifying %s... 100.0%%", displayName)
        fmt.Printf("\r%s\r", strings.Repeat(" ", len(progressText)))

        return hasher.Sum64(), nil
}

// checkDedup checks the integrity of a dedup file and its source files.
func checkDedup(dedupPath, sourceDir string, sourceChecksums bool) error {
        fmt.Printf("Checking dedup file: %s\n", dedupPath)
        fmt.Printf("Source directory:    %s\n", sourceDir)
        fmt.Println()

        // Phase 1: Open and verify dedup file integrity
        reader, err := dedup.NewReader(dedupPath, sourceDir)
        if err != nil {
                return fmt.Errorf("open dedup file: %w", err)
        }
        defer reader.Close()

        fmt.Print("Checking dedup file integrity...")
        if err := reader.VerifyIntegrity(); err != nil {
                fmt.Println(" FAILED")
                return fmt.Errorf("integrity check: %w", err)
        }
        fmt.Println(" OK")

        // Phase 2: Check source files exist with correct sizes
        sourceFiles := reader.SourceFiles()
        fmt.Printf("\nChecking source files (%d %s)...\n", len(sourceFiles), plural(len(sourceFiles), "file", "files"))

        errCount := 0
        for _, sf := range sourceFiles {
                sfPath := filepath.Join(sourceDir, sf.RelativePath)
                stat, err := os.Stat(sfPath)
                if err != nil {
                        fmt.Printf("  FAILED  %s: %v\n", sf.RelativePath, err)
                        errCount++
                        continue
                }
                if stat.Size() != sf.Size {
                        fmt.Printf("  FAILED  %s: size mismatch (expected %s, got %s)\n",
                                sf.RelativePath, formatInt(sf.Size), formatInt(stat.Size()))
                        errCount++
                        continue
                }
                fmt.Printf("  OK      %s (%s bytes)\n", sf.RelativePath, formatInt(sf.Size))
        }

        // Phase 3: Optionally verify source file checksums
        if sourceChecksums {
                if errCount > 0 {
                        fmt.Println("\nSkipping source checksum verification due to earlier errors")
                } else {
                        fmt.Printf("\nVerifying source file checksums...\n")
                        for _, sf := range sourceFiles {
                                sfPath := filepath.Join(sourceDir, sf.RelativePath)

                                checksum, err := calculateFileChecksumWithProgress(sfPath, sf.Size, sf.RelativePath)
                                if err != nil {
                                        fmt.Printf("  FAILED  %s: %v\n", sf.RelativePath, err)
                                        errCount++
                                        continue
                                }
                                if checksum != sf.Checksum {
                                        fmt.Printf("  FAILED  %s: checksum mismatch (expected %016x, got %016x)\n",
                                                sf.RelativePath, sf.Checksum, checksum)
                                        errCount++
                                        continue
                                }
                                fmt.Printf("  OK      %s\n", sf.RelativePath)
                        }
                }
        }

        // Final summary
        fmt.Println()
        if errCount > 0 {
                return fmt.Errorf("check FAILED: %d %s found", errCount, plural(errCount, "error", "errors"))
        }
        fmt.Println("Check PASSED")
        return nil
}

package main

import (
        "fmt"
        "log"
        "log/syslog"
        "os"
        "os/signal"
        "path/filepath"
        "sync"
        "syscall"

        "github.com/hanwen/go-fuse/v2/fs"
        "github.com/hanwen/go-fuse/v2/fuse"
        "github.com/stuckj/mkvdup/internal/daemon"
        "github.com/stuckj/mkvdup/internal/dedup"
        mkvfuse "github.com/stuckj/mkvdup/internal/fuse"
)

// defaultConfigPath is the default config file location.
const defaultConfigPath = "/etc/mkvdup.conf"

// expandConfigDir expands a directory path to a list of .yaml/.yml files it contains.
func expandConfigDir(dir string) ([]string, error) {
        entries, err := os.ReadDir(dir)
        if err != nil {
                return nil, fmt.Errorf("read config directory %s: %w", dir, err)
        }
        var paths []string
        for _, entry := range entries {
                if !entry.IsDir() && (filepath.Ext(entry.Name()) == ".yaml" || filepath.Ext(entry.Name()) == ".yml") {
                        paths = append(paths, filepath.Join(dir, entry.Name()))
                }
        }
        if len(paths) == 0 {
                return nil, fmt.Errorf("no YAML files (.yaml, .yml) found in %s", dir)
        }
        return paths, nil
}

// mountFuse mounts a FUSE filesystem exposing dedup files as MKV files.
func mountFuse(mountpoint string, configPaths []string, opts MountOptions) error {
        // Daemonize unless --foreground is set or we're already a daemon child
        if !opts.Foreground && !daemon.IsChild() {
                return daemon.Daemonize(opts.PidFile, opts.DaemonTimeout)
        }

        // Write PID file in foreground mode (daemon mode writes it in Daemonize)
        if opts.Foreground && opts.PidFile != "" {
                if err := daemon.WritePidFile(opts.PidFile, os.Getpid()); err != nil {
                        return fmt.Errorf("write pid file: %w", err)
                }
        }

        // Clean up PID file on exit (for both foreground and daemon child modes)
        if opts.PidFile != "" && (opts.Foreground || daemon.IsChild()) {
                defer func() {
                        _ = daemon.RemovePidFile(opts.PidFile)
                }()
        }

        // If no config paths provided, use default
        if len(configPaths) == 0 {
                if _, err := os.Stat(defaultConfigPath); err == nil {
                        configPaths = []string{defaultConfigPath}
                } else {
                        if daemon.IsChild() {
                                daemon.NotifyError(fmt.Errorf("no config files specified and %s not found", defaultConfigPath))
                        }
                        return fmt.Errorf("no config files specified and %s not found", defaultConfigPath)
                }
        }

        // Store the config-dir path for SIGHUP re-expansion
        var configDirPath string
        if opts.ConfigDir {
                configDirPath = configPaths[0]
        }

        // If configDir is set, expand directory to list of .yaml files
        if opts.ConfigDir {
                if len(configPaths) != 1 {
                        err := fmt.Errorf("--config-dir requires exactly one directory path, got %d", len(configPaths))
                        if daemon.IsChild() {
                                daemon.NotifyError(err)
                        }
                        return err
                }
                expanded, err := expandConfigDir(configPaths[0])
                if err != nil {
                        if daemon.IsChild() {
                                daemon.NotifyError(err)
                        }
                        return err
                }
                configPaths = expanded
        }

        // Set up permission store
        defaults := mkvfuse.Defaults{
                FileUID:  opts.DefaultUID,
                FileGID:  opts.DefaultGID,
                FileMode: opts.DefaultFileMode,
                DirUID:   opts.DefaultUID,
                DirGID:   opts.DefaultGID,
                DirMode:  opts.DefaultDirMode,
        }
        permPath := mkvfuse.ResolvePermissionsPath(opts.PermissionsFile)
        permStore := mkvfuse.NewPermissionStore(permPath, defaults, verbose)
        if err := permStore.Load(); err != nil {
                if daemon.IsChild() {
                        daemon.NotifyError(fmt.Errorf("load permissions: %w", err))
                }
                return fmt.Errorf("load permissions: %w", err)
        }

        // Resolve configs (expands includes, globs, virtual_files) and extract
        // on_error_command (first-wins across all config files).
        configs, errorCmdConfig, loadedConfigPaths, err := dedup.ResolveConfigs(configPaths)
        if err != nil {
                err = fmt.Errorf("resolve configs: %w", err)
                if daemon.IsChild() {
                        daemon.NotifyError(err)
                }
                return err
        }
        opts.OnErrorCommand = errorCmdConfig

        // Create the root filesystem
        root, err := mkvfuse.NewMKVFSFromConfigs(configs, verbose, &mkvfuse.DefaultReaderFactory{ReadTimeout: opts.SourceReadTimeout}, permStore)
        if err != nil {
                err = fmt.Errorf("create filesystem: %w", err)
                if daemon.IsChild() {
                        daemon.NotifyError(err)
                }
                return err
        }

        // Mount the filesystem
        fuseOpts := &fs.Options{
                MountOptions: fuse.MountOptions{
                        AllowOther: opts.AllowOther,
                        Name:       "mkvdup",
                        FsName:     "mkvdup",
                        MaxWrite:   1 << 20, // 1MB max read/write; go-fuse sets max_read = MaxWrite
                        // Enable kernel permission checks for standard Unix semantics.
                        // This properly handles supplementary groups and matches behavior
                        // of real filesystems (ext4, XFS, btrfs, etc.).
                        Options: []string{"default_permissions"},
                },
        }

        server, err := fs.Mount(mountpoint, root, fuseOpts)
        if err != nil {
                err = fmt.Errorf("mount: %w", err)
                if daemon.IsChild() {
                        daemon.NotifyError(err)
                }
                return err
        }

        // Wait for mount to be ready
        server.WaitMount()

        // Enable FUSE kernel notifications (NotifyDelete, NotifyEntry, etc.)
        // now that the go-fuse bridge is initialized.
        root.SetMounted()

        // In daemon mode, redirect log output to syslog before starting watchers
        // so that all log.Printf calls (from watchers, doReload, BuildDirectoryTree)
        // go to syslog. Must happen before daemon.Detach() which redirects stderr
        // to /dev/null.
        if daemon.IsChild() {
                if w, err := syslog.New(syslog.LOG_INFO|syslog.LOG_DAEMON, "mkvdup"); err == nil {
                        log.SetOutput(w)
                        log.SetFlags(0) // syslog adds its own timestamp
                        defer w.Close()
                }
        }

        // Set up source file watcher (monitors source files for changes)
        var sourceWatcher *mkvfuse.SourceWatcher
        if !opts.NoSourceWatch {
                // Closure over log.Printf: syslog setup above redirects the default
                // logger's output, so the watcher automatically picks it up.
                watchLogFn := func(format string, args ...interface{}) {
                        log.Printf(format, args...)
                }
                var err error
                sourceWatcher, err = mkvfuse.NewSourceWatcher(opts.OnSourceChange, opts.SourceWatchPollInterval, opts.OnErrorCommand, watchLogFn)
                if err != nil {
                        log.Printf("source-watch: warning: failed to create watcher: %v", err)
                } else {
                        sourceWatcher.Update(root.Files(), &mkvfuse.DefaultReaderFactory{ReadTimeout: opts.SourceReadTimeout})
                        sourceWatcher.Start()
                }
        }

        // Declare configWatcher before doReload so the closure can reference it.
        // Initialized below after doReload is defined.
        var configWatcher *mkvfuse.ConfigWatcher

        // doReload performs a config reload. Called by the SIGHUP handler and
        // the config file watcher callback. Serialized by reloadMu to prevent
        // concurrent reloads from racing on root.Reload() and watcher updates.
        // Uses log.Printf which is redirected to syslog in daemon mode (see
        // log.SetOutput above).
        var reloadMu sync.Mutex
        doReload := func() {
                reloadMu.Lock()
                defer reloadMu.Unlock()
                log.Printf("reloading config...")

                // Re-expand config-dir if applicable
                var reloadPaths []string
                if configDirPath != "" {
                        expanded, err := expandConfigDir(configDirPath)
                        if err != nil {
                                log.Printf("reload failed: expand config dir: %v", err)
                                return
                        }
                        reloadPaths = expanded
                } else {
                        reloadPaths = configPaths
                }

                // Resolve configs (expands includes, globs, virtual_files)
                configs, _, newConfigPaths, err := dedup.ResolveConfigs(reloadPaths)
                if err != nil {
                        log.Printf("reload failed: resolve configs: %v", err)
                        return
                }

                // Reload the filesystem
                if err := root.Reload(configs, func(format string, args ...interface{}) {
                        log.Printf(format, args...)
                }); err != nil {
                        log.Printf("reload failed: %v", err)
                        return
                }

                // Update source watcher with new file set
                if sourceWatcher != nil {
                        sourceWatcher.Update(root.Files(), &mkvfuse.DefaultReaderFactory{ReadTimeout: opts.SourceReadTimeout})
                }

                // Update config watcher with new config file set
                if configWatcher != nil {
                        configWatcher.Update(newConfigPaths)
                }

                log.Printf("config reloaded successfully")
        }

        // Set up config file watcher (monitors config files for changes)
        if !opts.NoConfigWatch {
                watchLogFn := func(format string, args ...interface{}) {
                        log.Printf(format, args...)
                }
                var err error
                configWatcher, err = mkvfuse.NewConfigWatcher(opts.OnConfigChange, opts.SourceWatchPollInterval, doReload, watchLogFn)
                if err != nil {
                        log.Printf("config-watch: warning: failed to create watcher: %v", err)
                } else {
                        configWatcher.Update(loadedConfigPaths)
                        configWatcher.Start()
                }
        }

        // If we're a daemon child, signal success and detach from terminal
        if daemon.IsChild() {
                if err := daemon.NotifyReady(); err != nil {
                        // Parent may have timed out; log and continue since mount succeeded
                        printWarn("warning: failed to notify parent: %v\n", err)
                }
                daemon.Detach()
        } else {
                // Running in foreground mode - print info
                fmt.Printf("Mounted at %s\n", mountpoint)
                fmt.Printf("Files:\n")
                for _, configPath := range configPaths {
                        config, _ := dedup.ReadConfig(configPath)
                        if config != nil {
                                fmt.Printf("  %s\n", config.Name)
                        }
                }
                fmt.Println()
                fmt.Println("Press Ctrl+C to unmount")
        }

        // Handle signals for graceful shutdown and config reload
        sigChan := make(chan os.Signal, 1)
        signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP)

        go func() {
                for sig := range sigChan {
                        switch sig {
                        case syscall.SIGHUP:
                                doReload()

                        case syscall.SIGINT, syscall.SIGTERM:
                                if !daemon.IsChild() {
                                        fmt.Println("\nUnmounting...")
                                }
                                server.Unmount()
                                return
                        }
                }
        }()

        // Serve until unmounted
        server.Wait()

        // Stop watchers
        if configWatcher != nil {
                configWatcher.Stop()
        }
        if sourceWatcher != nil {
                sourceWatcher.Stop()
        }

        if !daemon.IsChild() {
                fmt.Println("Unmounted")
        }

        return nil
}

// reloadDaemon validates config files and sends SIGHUP to the running daemon.
func reloadDaemon(pid int, configPaths []string, configDir bool) error {
        // Verify the process exists (on Unix, FindProcess always succeeds;
        // send signal 0 to check if process is actually running)
        process, err := os.FindProcess(pid)
        if err != nil {
                return fmt.Errorf("find process %d: %w", pid, err)
        }
        if err := process.Signal(syscall.Signal(0)); err != nil {
                return fmt.Errorf("daemon process %d is not running: %w", pid, err)
        }

        // Validate config if paths provided
        if len(configPaths) > 0 {
                resolved, err := resolveConfigPaths(configPaths, configDir)
                if err != nil {
                        return fmt.Errorf("resolve config paths: %w", err)
                }

                fmt.Println("Validating configuration...")
                allEntries, _, hasErrors := validateConfigEntries(resolved)
                nameErrors, _ := checkNameConflicts(allEntries)
                if hasErrors || nameErrors {
                        return fmt.Errorf("config validation failed, not sending reload signal")
                }
                fmt.Println("Configuration valid.")
                fmt.Println()
        }

        // Send SIGHUP to the daemon
        fmt.Printf("Sending SIGHUP to daemon (pid %d)...\n", pid)
        if err := process.Signal(syscall.SIGHUP); err != nil {
                return fmt.Errorf("send SIGHUP to process %d: %w", pid, err)
        }

        fmt.Println("Reload signal sent successfully.")
        return nil
}

// resolveConfigPaths expands --config-dir and applies defaults to get the final
// list of config file paths to validate.
func resolveConfigPaths(configPaths []string, configDir bool) ([]string, error) {
        if configDir {
                if len(configPaths) != 1 {
                        return nil, fmt.Errorf("--config-dir requires exactly one directory path, got %d", len(configPaths))
                }
                return expandConfigDir(configPaths[0])
        }

        if len(configPaths) == 0 {
                return nil, fmt.Errorf("no config files specified\nRun 'mkvdup validate --help' for usage")
        }

        return configPaths, nil
}

package main

import (
        "fmt"
        "os"
        "path/filepath"
        "sort"

        "github.com/stuckj/mkvdup/internal/matcher"
        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/source"
)

// ProbeResult represents the result of probing a source against an MKV.
type ProbeResult struct {
        MKVPath      string
        SourcePath   string
        MatchCount   int
        TotalSamples int
        MatchPercent float64
}

// mkvProbeData holds the pre-computed probe hashes for a single MKV file.
type mkvProbeData struct {
        Path        string
        HashCount   int
        ProbeHashes []matcher.ProbeHash
        Error       string // non-empty if MKV could not be parsed
}

// probe tests if one or more MKV files match one or more source directories.
// When multiple MKVs are provided, each source is indexed only once and all
// MKV hash sets are checked against it, making multi-MKV probing much faster.
func probe(mkvPaths []string, sourceDirs []string) error {
        fmt.Printf("Probing %d MKV(s) against %d source(s)...\n", len(mkvPaths), len(sourceDirs))
        fmt.Println()

        windowSize := source.DefaultWindowSize

        // Phase 1: Parse all MKVs and compute probe hashes
        mkvData := make([]mkvProbeData, 0, len(mkvPaths))
        for i, mkvPath := range mkvPaths {
                if len(mkvPaths) > 1 {
                        fmt.Printf("[%d/%d] Parsing %s...\n", i+1, len(mkvPaths), filepath.Base(mkvPath))
                } else {
                        fmt.Printf("Parsing %s...\n", filepath.Base(mkvPath))
                }

                hashes, err := computeProbeHashes(mkvPath, windowSize)
                if err != nil {
                        fmt.Printf("  Error: %v\n", err)
                        mkvData = append(mkvData, mkvProbeData{
                                Path:  mkvPath,
                                Error: err.Error(),
                        })
                        continue
                }

                fmt.Printf("  Computed %d probe hashes\n", len(hashes))
                mkvData = append(mkvData, mkvProbeData{
                        Path:        mkvPath,
                        HashCount:   len(hashes),
                        ProbeHashes: hashes,
                })
        }
        fmt.Println()

        // Phase 2: For each source, index once and check all MKV hash sets
        // results[mkvIdx] = []ProbeResult for that MKV
        results := make([][]ProbeResult, len(mkvData))
        for i := range results {
                results[i] = make([]ProbeResult, 0, len(sourceDirs))
        }

        for _, sourceDir := range sourceDirs {
                fmt.Printf("Indexing source: %s...\n", sourceDir)

                indexer, err := source.NewIndexer(sourceDir, windowSize)
                if err != nil {
                        fmt.Printf("  Error: %v\n", err)
                        for i, md := range mkvData {
                                if md.Error != "" {
                                        continue
                                }
                                results[i] = append(results[i], ProbeResult{
                                        MKVPath:      md.Path,
                                        SourcePath:   sourceDir,
                                        TotalSamples: md.HashCount,
                                })
                        }
                        continue
                }
                indexer.SetVerboseWriter(verboseWriter())

                if err := indexer.Build(nil); err != nil {
                        fmt.Printf("  Error building index: %v\n", err)
                        for i, md := range mkvData {
                                if md.Error != "" {
                                        continue
                                }
                                results[i] = append(results[i], ProbeResult{
                                        MKVPath:      md.Path,
                                        SourcePath:   sourceDir,
                                        TotalSamples: md.HashCount,
                                })
                        }
                        continue
                }

                index := indexer.Index()

                // Check each MKV's hashes against this source
                for i, md := range mkvData {
                        if md.Error != "" {
                                continue
                        }

                        matchCount := 0
                        for _, ph := range md.ProbeHashes {
                                if locs, ok := index.HashToLocations[ph.Hash]; ok {
                                        if index.UsesESOffsets {
                                                for _, loc := range locs {
                                                        if loc.IsVideo == ph.IsVideo {
                                                                matchCount++
                                                                break
                                                        }
                                                }
                                        } else if len(locs) > 0 {
                                                matchCount++
                                        }
                                }
                        }

                        matchPercent := float64(matchCount) / float64(md.HashCount) * 100
                        results[i] = append(results[i], ProbeResult{
                                MKVPath:      md.Path,
                                SourcePath:   sourceDir,
                                MatchCount:   matchCount,
                                TotalSamples: md.HashCount,
                                MatchPercent: matchPercent,
                        })

                        if len(mkvPaths) > 1 {
                                fmt.Printf("  %s: %d/%d (%.0f%%)\n",
                                        filepath.Base(md.Path), matchCount, md.HashCount, matchPercent)
                        } else {
                                fmt.Printf("  Matched %d/%d hashes (%.0f%%)\n",
                                        matchCount, md.HashCount, matchPercent)
                        }
                }

                index.Close()
        }

        // Phase 3: Print results
        fmt.Println()
        fmt.Println("=== Results ===")

        for i, md := range mkvData {
                if md.Error != "" {
                        fmt.Printf("\n  %s: ERROR: %s\n", filepath.Base(md.Path), md.Error)
                        continue
                }

                if len(mkvPaths) > 1 {
                        fmt.Printf("\n  %s:\n", filepath.Base(md.Path))
                } else {
                        fmt.Println()
                }

                // Sort this MKV's results by match percentage
                sort.Slice(results[i], func(a, b int) bool {
                        return results[i][a].MatchPercent > results[i][b].MatchPercent
                })

                for _, r := range results[i] {
                        indicator := ""
                        if r.MatchPercent >= 80 {
                                indicator = " ← likely match"
                        } else if r.MatchPercent >= 40 {
                                indicator = " ← possible match"
                        }
                        if len(mkvPaths) > 1 {
                                fmt.Printf("    %s  %d/%d matches (%.0f%%)%s\n",
                                        r.SourcePath, r.MatchCount, r.TotalSamples, r.MatchPercent, indicator)
                        } else {
                                fmt.Printf("  %s  %d/%d matches (%.0f%%)%s\n",
                                        r.SourcePath, r.MatchCount, r.TotalSamples, r.MatchPercent, indicator)
                        }
                }
        }

        fmt.Println()
        fmt.Println("Interpretation:")
        fmt.Println("  80-100%: Very likely the correct source")
        fmt.Println("  40-80%:  Possible match (may be partial content)")
        fmt.Println("  <40%:    Unlikely to be the source")

        return nil
}

// computeProbeHashes parses an MKV and returns its probe hashes.
func computeProbeHashes(mkvPath string, windowSize int) ([]matcher.ProbeHash, error) {
        parser, _, err := parseMKVWithProgress(mkvPath, "")
        if err != nil {
                return nil, err
        }
        defer parser.Close()

        packets := parser.Packets()
        if len(packets) == 0 {
                return nil, fmt.Errorf("no packets found in MKV")
        }

        trackTypes := make(map[int]int)
        trackNALLengthSize := make(map[int]int)
        for _, t := range parser.Tracks() {
                trackTypes[int(t.Number)] = t.Type
                trackNALLengthSize[int(t.Number)] = matcher.NALLengthSizeForTrack(t.CodecID, t.CodecPrivate)
        }

        samples := samplePackets(packets, 20)

        mkvFile, err := os.Open(mkvPath)
        if err != nil {
                return nil, fmt.Errorf("open MKV: %w", err)
        }
        defer mkvFile.Close()

        var probeHashes []matcher.ProbeHash
        for _, pkt := range samples {
                readSize := pkt.Size
                if readSize > 4096 {
                        readSize = 4096
                }
                if readSize < int64(windowSize) {
                        continue
                }

                data := make([]byte, readSize)
                n, err := mkvFile.ReadAt(data, pkt.Offset)
                if err != nil || n < windowSize {
                        continue
                }

                trackType := trackTypes[int(pkt.TrackNum)]
                isVideo := trackType == mkv.TrackTypeVideo
                nalLenSize := trackNALLengthSize[int(pkt.TrackNum)]

                hashes := matcher.ExtractProbeHashes(data[:n], isVideo, windowSize, nalLenSize)
                if len(hashes) > 0 {
                        probeHashes = append(probeHashes, hashes[0])
                }
        }

        if len(probeHashes) == 0 {
                return nil, fmt.Errorf("no valid hashes computed from sampled packets")
        }

        return probeHashes, nil
}

// samplePackets selects N packets distributed across the file:
// - 25% from first 10% of packets (early content)
// - 50% from middle 80% of packets (main content)
// - 25% from last 10% of packets (late content)
func samplePackets(packets []mkv.Packet, n int) []mkv.Packet {
        if len(packets) <= n {
                return packets
        }

        // Calculate distribution
        earlyCount := n / 4                    // 25% from first 10%
        lateCount := n / 4                     // 25% from last 10%
        midCount := n - earlyCount - lateCount // 50% from middle 80%

        // Calculate packet ranges
        earlyEnd := len(packets) / 10
        lateStart := len(packets) - len(packets)/10
        if earlyEnd < 1 {
                earlyEnd = 1
        }
        if lateStart <= earlyEnd {
                lateStart = earlyEnd + 1
        }

        samples := make([]mkv.Packet, 0, n)

        // Sample from early portion (first 10%)
        if earlyCount > 0 && earlyEnd > 0 {
                step := earlyEnd / earlyCount
                if step < 1 {
                        step = 1
                }
                for i := 0; i < earlyEnd && len(samples) < earlyCount; i += step {
                        samples = append(samples, packets[i])
                }
        }

        // Sample from middle portion (middle 80%)
        midStart := earlyEnd
        midEnd := lateStart
        if midCount > 0 && midEnd > midStart {
                step := (midEnd - midStart) / midCount
                if step < 1 {
                        step = 1
                }
                for i := midStart; i < midEnd && len(samples) < earlyCount+midCount; i += step {
                        samples = append(samples, packets[i])
                }
        }

        // Sample from late portion (last 10%)
        if lateCount > 0 && lateStart < len(packets) {
                step := (len(packets) - lateStart) / lateCount
                if step < 1 {
                        step = 1
                }
                for i := lateStart; i < len(packets) && len(samples) < n; i += step {
                        samples = append(samples, packets[i])
                }
        }

        return samples
}

package main

import (
        "errors"
        "fmt"
        "io"
        "os"
        "path/filepath"
        "syscall"

        "gopkg.in/yaml.v3"
)

// relocateDedup moves an .mkvdup file and its .mkvdup.yaml sidecar to a new
// location, recalculating relative paths in the sidecar so they resolve to
// the same absolute locations from the new position.
func relocateDedup(src, dst string, force, dryRun bool) error {
        // Resolve source to absolute path
        absSrc, err := filepath.Abs(src)
        if err != nil {
                return fmt.Errorf("resolve source path: %w", err)
        }

        // Verify source .mkvdup file exists
        srcInfo, err := os.Stat(absSrc)
        if err != nil {
                return fmt.Errorf("source file: %w", err)
        }
        if srcInfo.IsDir() {
                return fmt.Errorf("source %s is a directory, expected an .mkvdup file", absSrc)
        }

        // Determine sidecar path
        sidecarSrc := absSrc + ".yaml"
        hasSidecar := true
        if _, err := os.Stat(sidecarSrc); os.IsNotExist(err) {
                hasSidecar = false
        } else if err != nil {
                return fmt.Errorf("check sidecar: %w", err)
        }

        // Resolve destination
        absDst, err := filepath.Abs(dst)
        if err != nil {
                return fmt.Errorf("resolve destination path: %w", err)
        }

        // If destination is an existing directory, or an explicitly-directory path
        // (e.g. ends with a path separator, like "/new/location/"), move into it
        // with the same filename.
        dstInfo, err := os.Stat(absDst)
        isDirDst := false
        if err == nil && dstInfo.IsDir() {
                isDirDst = true
        } else if os.IsNotExist(err) && len(dst) > 0 && os.IsPathSeparator(dst[len(dst)-1]) {
                isDirDst = true
        } else if err != nil && !os.IsNotExist(err) {
                return fmt.Errorf("check destination: %w", err)
        }
        if isDirDst {
                absDst = filepath.Join(absDst, filepath.Base(absSrc))
        }

        // Don't relocate to the same path
        if absSrc == absDst {
                return fmt.Errorf("source and destination are the same: %s", absSrc)
        }

        sidecarDst := absDst + ".yaml"

        // Check destination doesn't already exist (unless --force)
        if !force {
                if _, err := os.Stat(absDst); err == nil {
                        return fmt.Errorf("destination %s already exists (use --force to overwrite)", absDst)
                } else if !os.IsNotExist(err) {
                        return fmt.Errorf("check destination %s: %w", absDst, err)
                }
                // Always check for existing destination sidecar, even if source has none,
                // to avoid leaving stale/mismatched sidecars.
                if _, err := os.Stat(sidecarDst); err == nil {
                        return fmt.Errorf("destination sidecar %s already exists (use --force to overwrite)", sidecarDst)
                } else if !os.IsNotExist(err) {
                        return fmt.Errorf("check destination sidecar %s: %w", sidecarDst, err)
                }
        }

        // Read and update sidecar if it exists, preserving all YAML keys/comments
        var updatedSidecar []byte
        if hasSidecar {
                sidecarData, err := os.ReadFile(sidecarSrc)
                if err != nil {
                        return fmt.Errorf("read sidecar: %w", err)
                }

                var doc yaml.Node
                if err := yaml.Unmarshal(sidecarData, &doc); err != nil {
                        return fmt.Errorf("parse sidecar %s: %w", sidecarSrc, err)
                }
                if doc.Kind != yaml.DocumentNode || len(doc.Content) == 0 {
                        return fmt.Errorf("sidecar %s: unexpected YAML structure", sidecarSrc)
                }
                root := doc.Content[0]
                if root.Kind != yaml.MappingNode {
                        return fmt.Errorf("sidecar %s: expected YAML mapping, got %v", sidecarSrc, root.Kind)
                }

                // Extract current values for dedup_file and source_dir
                oldDedupFile := yamlNodeValue(root, "dedup_file")
                oldSourceDir := yamlNodeValue(root, "source_dir")
                if oldDedupFile == "" || oldSourceDir == "" {
                        return fmt.Errorf("sidecar %s: missing required dedup_file or source_dir", sidecarSrc)
                }

                srcDir := filepath.Dir(absSrc)
                dstDir := filepath.Dir(absDst)

                // dedup_file should point to the new location (since the .mkvdup file
                // itself is being moved). Use the basename for relative paths (sidecar
                // and dedup file are always in the same directory), or the new absolute
                // path if the original was absolute.
                var newDedupFile string
                if filepath.IsAbs(oldDedupFile) {
                        newDedupFile = absDst
                } else {
                        newDedupFile = filepath.Base(absDst)
                }

                // source_dir points to a static location — recalculate relative to new position
                newSourceDir, err := recalcRelativePath(srcDir, dstDir, oldSourceDir)
                if err != nil {
                        return fmt.Errorf("recalculate source_dir path: %w", err)
                }

                // Validate that source_dir is still reachable from the new location
                absSourceDir := resolveRelPath(dstDir, newSourceDir)
                sdInfo, err := os.Stat(absSourceDir)
                if err != nil {
                        return fmt.Errorf("source directory not reachable from new location: %s → %s: %w", newSourceDir, absSourceDir, err)
                }
                if !sdInfo.IsDir() {
                        return fmt.Errorf("source_dir is not a directory from new location: %s → %s", newSourceDir, absSourceDir)
                }

                // Update values in the YAML node tree (preserves all other keys/comments)
                setYAMLNodeValue(root, "dedup_file", newDedupFile)
                setYAMLNodeValue(root, "source_dir", newSourceDir)

                // Recalculate relative paths in virtual_files entries
                if err := recalcVirtualFiles(root, srcDir, dstDir); err != nil {
                        return fmt.Errorf("recalculate virtual_files paths: %w", err)
                }

                // Recalculate relative include patterns
                recalcIncludes(root, srcDir, dstDir)

                updatedSidecar, err = yaml.Marshal(&doc)
                if err != nil {
                        return fmt.Errorf("marshal updated sidecar: %w", err)
                }
        }

        // Dry run: print what would happen and return
        if dryRun {
                printInfo("Would move:\n")
                printInfo("  %s → %s\n", absSrc, absDst)
                if hasSidecar {
                        printInfo("  %s → %s\n", sidecarSrc, sidecarDst)
                        printInfo("\nUpdated sidecar would contain:\n")
                        printInfo("%s", string(updatedSidecar))
                }
                return nil
        }

        // Ensure destination directory exists
        dstDir := filepath.Dir(absDst)
        if err := os.MkdirAll(dstDir, 0755); err != nil {
                return fmt.Errorf("create destination directory: %w", err)
        }

        // Move the .mkvdup file (supports cross-filesystem moves)
        if err := moveFile(absSrc, absDst); err != nil {
                return fmt.Errorf("move dedup file: %w", err)
        }

        // With --force and no source sidecar, clean up any orphaned destination
        // sidecar now that the dedup move has succeeded.
        if force && !hasSidecar {
                if _, err := os.Stat(sidecarDst); err == nil {
                        if err := osRemove(sidecarDst); err != nil {
                                printWarn("Warning: could not remove orphaned sidecar %s: %v\n", sidecarDst, err)
                        }
                }
        }

        // Write updated sidecar atomically, then remove old one.
        // If sidecar write fails, rollback the dedup move.
        if hasSidecar {
                if err := writeFileAtomic(sidecarDst, updatedSidecar, 0644); err != nil {
                        if rbErr := moveFile(absDst, absSrc); rbErr != nil {
                                printWarn("Warning: failed to rollback dedup move: %v\n", rbErr)
                        }
                        return fmt.Errorf("write sidecar: %w", err)
                }
                if sidecarSrc != sidecarDst {
                        if err := osRemove(sidecarSrc); err != nil && !os.IsNotExist(err) {
                                printWarn("Warning: could not remove old sidecar %s: %v\n", sidecarSrc, err)
                        }
                }
        }

        printInfo("Moved:\n")
        printInfo("  %s → %s\n", absSrc, absDst)
        if hasSidecar {
                printInfo("  %s → %s\n", sidecarSrc, sidecarDst)
        }

        return nil
}

// recalcRelativePath takes a path (which may be relative to oldBase or absolute),
// resolves it to absolute, and returns it relative to newBase. If the original
// path was absolute, it is returned unchanged.
func recalcRelativePath(oldBase, newBase, path string) (string, error) {
        if filepath.IsAbs(path) {
                return path, nil
        }

        // Resolve to absolute using old base
        absPath := filepath.Join(oldBase, path)
        absPath = filepath.Clean(absPath)

        // Make relative to new base
        rel, err := filepath.Rel(newBase, absPath)
        if err != nil {
                return "", fmt.Errorf("make relative to %s: %w", newBase, err)
        }

        return rel, nil
}

// resolveRelPath resolves a path relative to baseDir. If already absolute, returns as-is.
func resolveRelPath(baseDir, path string) string {
        if filepath.IsAbs(path) {
                return path
        }
        return filepath.Clean(filepath.Join(baseDir, path))
}

// writeFileAtomic writes data to dst via a temp file + rename, ensuring
// no partially written file is left at dst on failure. The temp file is
// cleaned up automatically on any error.
func writeFileAtomic(dst string, data []byte, perm os.FileMode) error {
        tmpFile, err := os.CreateTemp(filepath.Dir(dst), ".mkvdup-relocate-*.tmp")
        if err != nil {
                return err
        }
        tmpPath := tmpFile.Name()
        success := false
        defer func() {
                if !success {
                        _ = osRemove(tmpPath)
                }
        }()

        if _, err := tmpFile.Write(data); err != nil {
                tmpFile.Close()
                return err
        }
        if err := tmpFile.Close(); err != nil {
                return err
        }
        if err := os.Chmod(tmpPath, perm); err != nil {
                return err
        }
        if err := osRename(tmpPath, dst); err != nil {
                return err
        }
        success = true
        return nil
}

// moveFile moves a file from src to dst. It tries os.Rename first for
// efficiency; if that fails with EXDEV (cross-device), it falls back to
// copy + remove.
func moveFile(src, dst string) error {
        err := osRename(src, dst)
        if err == nil {
                return nil
        }
        if !errors.Is(err, syscall.EXDEV) {
                return err
        }

        // Cross-filesystem: copy then remove source.
        if err := copyFile(src, dst); err != nil {
                return fmt.Errorf("cross-device copy: %w", err)
        }
        if err := osRemove(src); err != nil {
                return fmt.Errorf("remove source after cross-device copy: %w", err)
        }
        return nil
}

// copyFile copies a file from src to dst, preserving permissions.
func copyFile(src, dst string) error {
        srcFile, err := os.Open(src)
        if err != nil {
                return err
        }
        defer srcFile.Close()

        srcInfo, err := srcFile.Stat()
        if err != nil {
                return err
        }

        dstFile, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, srcInfo.Mode())
        if err != nil {
                return err
        }

        if _, err := io.Copy(dstFile, srcFile); err != nil {
                dstFile.Close()
                _ = osRemove(dst)
                return err
        }
        if err := dstFile.Close(); err != nil {
                _ = osRemove(dst)
                return err
        }
        return nil
}

// yamlNodeValue returns the string value for a key in a YAML mapping node.
// Returns "" if the key is not found.
func yamlNodeValue(mapping *yaml.Node, key string) string {
        for i := 0; i < len(mapping.Content)-1; i += 2 {
                if mapping.Content[i].Value == key {
                        return mapping.Content[i+1].Value
                }
        }
        return ""
}

// setYAMLNodeValue sets the string value for a key in a YAML mapping node.
func setYAMLNodeValue(mapping *yaml.Node, key, value string) {
        for i := 0; i < len(mapping.Content)-1; i += 2 {
                if mapping.Content[i].Value == key {
                        mapping.Content[i+1].Value = value
                        return
                }
        }
}

// recalcVirtualFiles recalculates relative dedup_file and source_dir paths
// in virtual_files entries (a YAML sequence of mappings).
func recalcVirtualFiles(root *yaml.Node, srcDir, dstDir string) error {
        vfNode := yamlNodeByKey(root, "virtual_files")
        if vfNode == nil || vfNode.Kind != yaml.SequenceNode {
                return nil
        }
        for i, entry := range vfNode.Content {
                if entry.Kind != yaml.MappingNode {
                        continue
                }
                // Recalculate dedup_file (points to a static file, not being moved)
                if old := yamlNodeValue(entry, "dedup_file"); old != "" {
                        recalced, err := recalcRelativePath(srcDir, dstDir, old)
                        if err != nil {
                                return fmt.Errorf("virtual_files[%d].dedup_file: %w", i, err)
                        }
                        setYAMLNodeValue(entry, "dedup_file", recalced)
                }
                // Recalculate source_dir
                if old := yamlNodeValue(entry, "source_dir"); old != "" {
                        recalced, err := recalcRelativePath(srcDir, dstDir, old)
                        if err != nil {
                                return fmt.Errorf("virtual_files[%d].source_dir: %w", i, err)
                        }
                        setYAMLNodeValue(entry, "source_dir", recalced)
                }
        }
        return nil
}

// recalcIncludes recalculates relative include glob patterns in the sidecar.
func recalcIncludes(root *yaml.Node, srcDir, dstDir string) {
        inclNode := yamlNodeByKey(root, "includes")
        if inclNode == nil || inclNode.Kind != yaml.SequenceNode {
                return
        }
        for _, entry := range inclNode.Content {
                if entry.Kind != yaml.ScalarNode || filepath.IsAbs(entry.Value) {
                        continue
                }
                // Recalculate the relative portion. Glob patterns may contain
                // wildcards, but the directory prefix is what needs adjusting.
                // recalcRelativePath works on the path as-is since filepath.Rel
                // handles non-existent paths fine.
                recalced, err := recalcRelativePath(srcDir, dstDir, entry.Value)
                if err == nil {
                        entry.Value = recalced
                }
        }
}

// yamlNodeByKey returns the value node for a key in a YAML mapping node.
// Returns nil if the key is not found.
func yamlNodeByKey(mapping *yaml.Node, key string) *yaml.Node {
        for i := 0; i < len(mapping.Content)-1; i += 2 {
                if mapping.Content[i].Value == key {
                        return mapping.Content[i+1]
                }
        }
        return nil
}

package main

import (
        "fmt"
        "os"

        "github.com/stuckj/mkvdup/internal/dedup"
)

// fileStats holds statistics for a single dedup file.
type fileStats struct {
        name        string
        dedupFile   string
        sourceDir   string
        origSize    int64
        dedupSize   int64
        sourceType  string
        sourceFiles int
        entryCount  int
        err         error
}

// showStats displays space savings and file statistics for mkvdup-managed files.
func showStats(configPaths []string, configDir bool) error {
        resolved, err := resolveConfigPaths(configPaths, configDir)
        if err != nil {
                return err
        }

        // Resolve each config independently so a single bad config doesn't
        // abort the entire stats run.
        var configs []dedup.Config
        for _, cfgPath := range resolved {
                cfgs, _, _, cfgErr := dedup.ResolveConfigs([]string{cfgPath})
                if cfgErr != nil {
                        printWarn("Failed to load config %s: %v\n", cfgPath, cfgErr)
                        continue
                }
                configs = append(configs, cfgs...)
        }

        if len(configs) == 0 {
                printInfoln("No files found.")
                return nil
        }

        var stats []fileStats
        for _, cfg := range configs {
                fs := collectFileStats(cfg)
                stats = append(stats, fs)

                if fs.err != nil {
                        printWarn("%s\n  Error: %v\n\n", fs.name, fs.err)
                        continue
                }

                printFileStats(fs)
        }

        printRollupStats(stats)

        return nil
}

// collectFileStats gathers statistics for a single dedup file from its config.
func collectFileStats(cfg dedup.Config) fileStats {
        fs := fileStats{
                name:      cfg.Name,
                dedupFile: cfg.DedupFile,
                sourceDir: cfg.SourceDir,
        }

        reader, err := dedup.NewReaderLazy(cfg.DedupFile, cfg.SourceDir)
        if err != nil {
                fs.err = fmt.Errorf("open dedup file: %w", err)
                return fs
        }
        defer reader.Close()

        info := reader.Info()
        if errMsg, ok := info["error"]; ok {
                fs.err = fmt.Errorf("read dedup file: %v", errMsg)
                return fs
        }

        fs.origSize = info["original_size"].(int64)
        fs.sourceFiles = info["source_file_count"].(int)
        fs.entryCount = info["entry_count"].(int)

        switch info["source_type"].(uint8) {
        case 0:
                fs.sourceType = "DVD"
        case 1:
                fs.sourceType = "Blu-ray"
        default:
                fs.sourceType = "Unknown"
        }

        dedupInfo, err := os.Stat(cfg.DedupFile)
        if err != nil {
                fs.err = fmt.Errorf("stat dedup file: %w", err)
                return fs
        }
        fs.dedupSize = dedupInfo.Size()

        return fs
}

// printFileStats prints per-file statistics.
func printFileStats(fs fileStats) {
        savings := float64(0)
        if fs.origSize > 0 {
                savings = float64(fs.origSize-fs.dedupSize) / float64(fs.origSize) * 100
        }

        printInfo("%s\n", fs.name)
        printInfo("  Original size:     %s bytes (%s)\n", formatInt(fs.origSize), formatSize(fs.origSize))
        printInfo("  Dedup file size:   %s bytes (%s)\n", formatInt(fs.dedupSize), formatSize(fs.dedupSize))
        printInfo("  Space savings:     %s bytes (%.2f%%)\n", formatInt(fs.origSize-fs.dedupSize), savings)
        printInfo("  Source type:       %s\n", fs.sourceType)
        printInfo("  Source directory:  %s\n", fs.sourceDir)
        printInfo("  Source files:      %d\n", fs.sourceFiles)
        printInfo("  Index entries:     %s\n", formatInt(int64(fs.entryCount)))
        printInfoln()
}

// printRollupStats prints aggregate statistics across all successful files.
func printRollupStats(stats []fileStats) {
        var totalOrig, totalDedup int64
        var succeeded int
        uniqueSources := map[string]struct{}{}

        for _, fs := range stats {
                if fs.err != nil {
                        continue
                }
                succeeded++
                totalOrig += fs.origSize
                totalDedup += fs.dedupSize
                uniqueSources[fs.sourceDir] = struct{}{}
        }

        if succeeded < 2 {
                return
        }

        savings := float64(0)
        if totalOrig > 0 {
                savings = float64(totalOrig-totalDedup) / float64(totalOrig) * 100
        }

        printInfo("Totals (%d %s):\n", succeeded, plural(succeeded, "file", "files"))
        printInfo("  Original size:     %s bytes (%s)\n", formatInt(totalOrig), formatSize(totalOrig))
        printInfo("  Dedup file size:   %s bytes (%s)\n", formatInt(totalDedup), formatSize(totalDedup))
        printInfo("  Space savings:     %s bytes (%.2f%%)\n", formatInt(totalOrig-totalDedup), savings)
        printInfo("  Unique sources:    %d\n", len(uniqueSources))
}

package main

import (
        "fmt"
        "os"
        "path"
        "path/filepath"
        "slices"
        "strings"

        "github.com/stuckj/mkvdup/internal/dedup"
)

// validationEntry tracks the result of validating a single resolved config entry.
type validationEntry struct {
        name       string // virtual file name
        status     string // "OK", "WARN", "ERR"
        message    string // detail message (empty for OK)
        configFile string // which input config file this came from
        dedupFile  string // resolved dedup file path
}

// validateConfigEntries resolves and validates each config file: YAML parsing,
// path existence checks, and dedup file header validation. Returns the
// validation entries, the successfully-parsed configs, and whether any errors
// were found.
func validateConfigEntries(configPaths []string) ([]validationEntry, []dedup.Config, bool) {
        var allEntries []validationEntry
        var allConfigs []dedup.Config
        hasErrors := false

        for _, configPath := range configPaths {
                fmt.Printf("Validating %s...\n", filepath.Base(configPath))

                configs, _, _, err := dedup.ResolveConfigs([]string{configPath})
                if err != nil {
                        fmt.Printf("  ERR  %s\n", err)
                        allEntries = append(allEntries, validationEntry{
                                name:       filepath.Base(configPath),
                                status:     "ERR",
                                message:    err.Error(),
                                configFile: configPath,
                        })
                        hasErrors = true
                        continue
                }

                if len(configs) == 0 {
                        fmt.Printf("  (no entries)\n")
                        continue
                }

                for _, cfg := range configs {
                        entry := validationEntry{
                                name:       cfg.Name,
                                status:     "OK",
                                configFile: configPath,
                                dedupFile:  cfg.DedupFile,
                        }

                        // Check dedup file exists
                        dedupStat, err := os.Stat(cfg.DedupFile)
                        if err != nil {
                                entry.status = "ERR"
                                entry.message = fmt.Sprintf("dedup file: %v", err)
                                fmt.Printf("  ERR  %s: %s\n", cfg.Name, entry.message)
                                allEntries = append(allEntries, entry)
                                hasErrors = true
                                continue
                        }
                        if dedupStat.IsDir() {
                                entry.status = "ERR"
                                entry.message = fmt.Sprintf("dedup file is a directory: %s", cfg.DedupFile)
                                fmt.Printf("  ERR  %s: %s\n", cfg.Name, entry.message)
                                allEntries = append(allEntries, entry)
                                hasErrors = true
                                continue
                        }

                        // Check source dir exists and is a directory
                        sourceStat, err := os.Stat(cfg.SourceDir)
                        if err != nil {
                                entry.status = "ERR"
                                entry.message = fmt.Sprintf("source directory: %v", err)
                                fmt.Printf("  ERR  %s: %s\n", cfg.Name, entry.message)
                                allEntries = append(allEntries, entry)
                                hasErrors = true
                                continue
                        }
                        if !sourceStat.IsDir() {
                                entry.status = "ERR"
                                entry.message = fmt.Sprintf("source path is not a directory: %s", cfg.SourceDir)
                                fmt.Printf("  ERR  %s: %s\n", cfg.Name, entry.message)
                                allEntries = append(allEntries, entry)
                                hasErrors = true
                                continue
                        }

                        // Validate dedup file header
                        reader, err := dedup.NewReaderLazy(cfg.DedupFile, cfg.SourceDir)
                        if err != nil {
                                entry.status = "ERR"
                                entry.message = fmt.Sprintf("invalid dedup file: %v", err)
                                fmt.Printf("  ERR  %s: %s\n", cfg.Name, entry.message)
                                allEntries = append(allEntries, entry)
                                hasErrors = true
                                continue
                        }
                        reader.Close()

                        allEntries = append(allEntries, entry)
                        allConfigs = append(allConfigs, cfg)
                }
        }

        return allEntries, allConfigs, hasErrors
}

// checkNameConflicts validates virtual file paths and detects duplicate names
// and file/directory conflicts across all entries. Updates entry statuses
// in-place and returns whether any errors or warnings were found.
func checkNameConflicts(entries []validationEntry) (hasErrors, hasWarnings bool) {
        nameToConfig := make(map[string]string)   // clean path -> config file
        dirComponents := make(map[string]string)  // paths used as directories -> config file
        fileComponents := make(map[string]string) // paths used as files -> config file

        for i, entry := range entries {
                if entry.status == "ERR" {
                        continue
                }

                name := entry.name

                // Check for ".." path components
                if slices.Contains(strings.Split(name, "/"), "..") {
                        entries[i].status = "ERR"
                        entries[i].message = "invalid path: contains '..' component"
                        fmt.Printf("  ERR  %s: %s\n", name, entries[i].message)
                        hasErrors = true
                        continue
                }

                // Clean and validate the path (same logic as tree.go insertFile)
                cleanPath := cleanVirtualPath(name)
                if cleanPath == "" {
                        entries[i].status = "ERR"
                        entries[i].message = "invalid path: empty after cleaning"
                        fmt.Printf("  ERR  %s: %s\n", name, entries[i].message)
                        hasErrors = true
                        continue
                }

                // Check for duplicate names
                if prevConfig, exists := nameToConfig[cleanPath]; exists {
                        entries[i].status = "WARN"
                        entries[i].message = fmt.Sprintf("duplicate name (also in %s)", filepath.Base(prevConfig))
                        fmt.Printf("  WARN %s: %s\n", name, entries[i].message)
                        hasWarnings = true
                        continue
                }
                nameToConfig[cleanPath] = entry.configFile

                // Check for file/directory conflicts
                parts := strings.Split(cleanPath, "/")
                conflictFound := false

                // Check if any prefix of this path is used as a file
                for j := 0; j < len(parts)-1; j++ {
                        dirPath := strings.Join(parts[:j+1], "/")
                        if prevConfig, exists := fileComponents[dirPath]; exists {
                                entries[i].status = "WARN"
                                entries[i].message = fmt.Sprintf("path component %q conflicts with file in %s", dirPath, filepath.Base(prevConfig))
                                fmt.Printf("  WARN %s: %s\n", name, entries[i].message)
                                hasWarnings = true
                                conflictFound = true
                                break
                        }
                        // Record as directory component
                        if _, exists := dirComponents[dirPath]; !exists {
                                dirComponents[dirPath] = entry.configFile
                        }
                }
                if conflictFound {
                        continue
                }

                // Check if this file name conflicts with a directory
                if prevConfig, exists := dirComponents[cleanPath]; exists {
                        entries[i].status = "WARN"
                        entries[i].message = fmt.Sprintf("conflicts with directory from %s", filepath.Base(prevConfig))
                        fmt.Printf("  WARN %s: %s\n", name, entries[i].message)
                        hasWarnings = true
                        continue
                }

                fileComponents[cleanPath] = entry.configFile

                // Print OK for entries that passed all checks
                if entries[i].status == "OK" {
                        fmt.Printf("  OK   %s\n", name)
                }
        }

        return hasErrors, hasWarnings
}

// runDeepValidation performs integrity verification on dedup files that passed
// basic validation. Returns whether any errors were found.
func runDeepValidation(entries []validationEntry, configs []dedup.Config) bool {
        fmt.Println()
        fmt.Println("Running deep validation...")
        hasErrors := false
        for _, cfg := range configs {
                // Only deep-validate entries that passed basic validation
                entryOK := false
                for _, e := range entries {
                        if e.name == cfg.Name && e.dedupFile == cfg.DedupFile && e.status != "ERR" {
                                entryOK = true
                                break
                        }
                }
                if !entryOK {
                        continue
                }

                reader, err := dedup.NewReader(cfg.DedupFile, cfg.SourceDir)
                if err != nil {
                        fmt.Printf("  ERR  %s: failed to open: %v\n", cfg.Name, err)
                        hasErrors = true
                        continue
                }
                if err := reader.VerifyIntegrity(); err != nil {
                        fmt.Printf("  ERR  %s: integrity check failed: %v\n", cfg.Name, err)
                        reader.Close()
                        hasErrors = true
                        continue
                }
                reader.Close()
                fmt.Printf("  OK   %s: checksums valid\n", cfg.Name)
        }
        return hasErrors
}

// validateConfigs validates configuration files and returns an exit code.
// Returns 0 if all configs are valid (warnings OK without strict), 1 otherwise.
func validateConfigs(configPaths []string, configDir, deep, strict bool) int {
        resolved, err := resolveConfigPaths(configPaths, configDir)
        if err != nil {
                printWarn("Error: %v\n", err)
                return 1
        }

        allEntries, allConfigs, hasErrors := validateConfigEntries(resolved)

        nameErrors, hasWarnings := checkNameConflicts(allEntries)
        hasErrors = hasErrors || nameErrors

        if deep {
                hasErrors = hasErrors || runDeepValidation(allEntries, allConfigs)
        }

        // Print summary
        var okCount, warnCount, errCount int
        for _, e := range allEntries {
                switch e.status {
                case "OK":
                        okCount++
                case "WARN":
                        warnCount++
                case "ERR":
                        errCount++
                }
        }

        fmt.Println()
        fmt.Printf("Summary: %d %s, %d valid, %d %s, %d %s\n",
                len(allEntries), plural(len(allEntries), "entry", "entries"),
                okCount,
                warnCount, plural(warnCount, "warning", "warnings"),
                errCount, plural(errCount, "error", "errors"))

        if hasErrors {
                return 1
        }
        if strict && hasWarnings {
                return 1
        }
        return 0
}

// cleanVirtualPath normalizes a virtual file path, matching the logic in
// internal/fuse/tree.go insertFile(). Returns empty string if the path is invalid.
func cleanVirtualPath(name string) string {
        // Clean the path using path.Clean (not filepath.Clean) to match
        // internal/fuse/tree.go insertFile() which uses forward-slash paths.
        cleaned := path.Clean(name)
        // Split and filter
        parts := strings.Split(cleaned, "/")
        var valid []string
        for _, p := range parts {
                if p != "" && p != "." {
                        valid = append(valid, p)
                }
        }
        if len(valid) == 0 {
                return ""
        }
        return strings.Join(valid, "/")
}

package main

import (
        "bytes"
        "fmt"
        "io"
        "os"
        "path/filepath"

        "github.com/stuckj/mkvdup/internal/dedup"
        "github.com/stuckj/mkvdup/internal/source"
)

// verifyReconstructionFunc is the function used for post-create verification.
// It can be overridden in tests to simulate verification failures.
var verifyReconstructionFunc = verifyReconstruction

// verifyReconstruction verifies that the dedup file can reconstruct the original MKV.
// If phasePrefix is non-empty, a progress bar is shown.
func verifyReconstruction(dedupPath, sourceDir, originalPath string, index *source.Index, phasePrefix string) error {
        reader, err := dedup.NewReader(dedupPath, sourceDir)
        if err != nil {
                return fmt.Errorf("open dedup file: %w", err)
        }
        defer reader.Close()

        if err := reader.LoadSourceFiles(); err != nil {
                return fmt.Errorf("load source files: %w", err)
        }

        // Open original MKV
        original, err := os.Open(originalPath)
        if err != nil {
                return fmt.Errorf("open original: %w", err)
        }
        defer original.Close()

        // Debug: show first few bytes comparison (controlled by verboseWriter; may be enabled via -v/--verbose or --log-verbose + --log-file)
        if vw := verboseWriter(); vw != nil {
                origFirst := make([]byte, 32)
                reconFirst := make([]byte, 32)
                n, _ := original.ReadAt(origFirst, 0)
                fmt.Fprintf(vw, "  Debug: Original ReadAt(32, 0) returned %d bytes\n", n)
                n, _ = reader.ReadAt(reconFirst, 0)
                fmt.Fprintf(vw, "  Debug: Reader ReadAt(32, 0) returned %d bytes\n", n)
                fmt.Fprintf(vw, "  Debug: Original first 32 bytes:      %x\n", origFirst)
                fmt.Fprintf(vw, "  Debug: Reconstructed first 32 bytes: %x\n", reconFirst)
                original.Seek(0, 0) // Reset file position
        }

        totalSize := reader.OriginalSize()
        var bar *progressBar
        if phasePrefix != "" {
                bar = newProgressBar(phasePrefix, totalSize, "bytes")
                defer bar.Cancel() // clean up if we return early on error
        }

        // Compare chunk by chunk
        const chunkSize = 1024 * 1024 // 1MB
        originalBuf := make([]byte, chunkSize)
        reconstructedBuf := make([]byte, chunkSize)

        var offset int64
        for {
                n1, err1 := original.Read(originalBuf)
                if n1 == 0 && err1 == io.EOF {
                        break
                }
                n2, err2 := reader.ReadAt(reconstructedBuf[:n1], offset)

                if vw := verboseWriter(); vw != nil && offset == 0 {
                        fmt.Fprintf(vw, "  Debug: Loop first read - n1=%d, n2=%d, err1=%v, err2=%v\n", n1, n2, err1, err2)
                        fmt.Fprintf(vw, "  Debug: originalBuf first 32:      %x\n", originalBuf[:32])
                        fmt.Fprintf(vw, "  Debug: reconstructedBuf first 32: %x\n", reconstructedBuf[:32])
                }

                if n1 != n2 {
                        return fmt.Errorf("size mismatch at offset %d: original=%d, reconstructed=%d", offset, n1, n2)
                }

                if !bytes.Equal(originalBuf[:n1], reconstructedBuf[:n2]) {
                        // Find first mismatch
                        for i := 0; i < n1; i++ {
                                if originalBuf[i] != reconstructedBuf[i] {
                                        return fmt.Errorf("data mismatch at offset %d (orig: %02x, recon: %02x)",
                                                offset+int64(i), originalBuf[i], reconstructedBuf[i])
                                }
                        }
                }

                offset += int64(n1)
                if bar != nil {
                        bar.Update(offset)
                }

                if err1 != nil && err1 != io.EOF {
                        return fmt.Errorf("read original at %d: %w", offset, err1)
                }
                if err2 != nil && err2 != io.EOF {
                        return fmt.Errorf("read reconstructed at %d: %w", offset, err2)
                }
        }

        if bar != nil {
                bar.Finish()
        }
        return nil
}

// openDedupReader opens a dedup file with its source directory, verifies
// integrity, loads source files, and checks source file sizes. This is the
// shared preamble for verify, extract, and similar commands.
func openDedupReader(dedupPath, sourceDir string) (*dedup.Reader, error) {
        reader, err := dedup.NewReader(dedupPath, sourceDir)
        if err != nil {
                return nil, fmt.Errorf("open dedup file: %w", err)
        }

        printInfo("Verifying dedup file checksums...")
        if err := reader.VerifyIntegrity(); err != nil {
                printInfoln(" FAILED")
                reader.Close()
                return nil, fmt.Errorf("integrity check: %w", err)
        }
        printInfoln(" OK")

        if err := reader.LoadSourceFiles(); err != nil {
                reader.Close()
                return nil, fmt.Errorf("load source files: %w", err)
        }

        printInfo("Verifying source files...")
        for _, sf := range reader.SourceFiles() {
                path := filepath.Join(sourceDir, sf.RelativePath)
                stat, err := os.Stat(path)
                if err != nil {
                        printInfoln(" FAILED")
                        reader.Close()
                        return nil, fmt.Errorf("source file %s: %w", sf.RelativePath, err)
                }
                if stat.Size() != sf.Size {
                        printInfoln(" FAILED")
                        reader.Close()
                        return nil, fmt.Errorf("source file %s size mismatch: expected %d, got %d",
                                sf.RelativePath, sf.Size, stat.Size())
                }
        }
        printInfoln(" OK")

        return reader, nil
}

// verifyDedup verifies a dedup file against the original MKV.
func verifyDedup(dedupPath, sourceDir, originalPath string) error {
        printInfo("Verifying dedup file: %s\n", dedupPath)
        printInfo("Source directory:     %s\n", sourceDir)
        printInfo("Original MKV:         %s\n", originalPath)
        printInfoln()

        reader, err := openDedupReader(dedupPath, sourceDir)
        if err != nil {
                return err
        }
        defer reader.Close()

        // Verify reconstruction matches original
        original, err := os.Open(originalPath)
        if err != nil {
                return fmt.Errorf("open original: %w", err)
        }
        defer original.Close()

        totalSize := reader.OriginalSize()
        bar := newProgressBar("Verifying reconstruction...", totalSize, "bytes")
        defer bar.Cancel() // clean up if we return early on error

        const chunkSize = 4 * 1024 * 1024
        originalBuf := make([]byte, chunkSize)
        reconstructedBuf := make([]byte, chunkSize)
        var offset int64

        for offset < totalSize {
                remaining := totalSize - offset
                readSize := int64(chunkSize)
                if readSize > remaining {
                        readSize = remaining
                }

                n1, err1 := original.Read(originalBuf[:readSize])
                n2, err2 := reader.ReadAt(reconstructedBuf[:readSize], offset)

                if n1 != n2 {
                        return fmt.Errorf("size mismatch at offset %d", offset)
                }

                if !bytes.Equal(originalBuf[:n1], reconstructedBuf[:n2]) {
                        for i := 0; i < n1; i++ {
                                if originalBuf[i] != reconstructedBuf[i] {
                                        return fmt.Errorf("data mismatch at offset %d", offset+int64(i))
                                }
                        }
                }

                if err1 != nil && err1 != io.EOF {
                        return fmt.Errorf("read original: %w", err1)
                }
                if err2 != nil && err2 != io.EOF {
                        return fmt.Errorf("read reconstructed: %w", err2)
                }

                offset += int64(n1)
                bar.Update(offset)
        }
        bar.Finish()

        printInfoln()
        printInfoln("Verification PASSED")
        return nil
}

// extractDedup rebuilds the original MKV from a dedup file and source.
func extractDedup(dedupPath, sourceDir, outputPath string) (retErr error) {
        printInfo("Dedup file:        %s\n", dedupPath)
        printInfo("Source directory:  %s\n", sourceDir)
        printInfo("Output MKV:        %s\n", outputPath)
        printInfoln()

        reader, err := openDedupReader(dedupPath, sourceDir)
        if err != nil {
                return err
        }
        defer reader.Close()

        out, err := os.Create(outputPath)
        if err != nil {
                return fmt.Errorf("create output file: %w", err)
        }
        defer func() {
                // Only close if not already closed by the success path below.
                // On error, clean up the partial output file.
                if retErr != nil {
                        out.Close()
                        os.Remove(outputPath)
                }
        }()

        totalSize := reader.OriginalSize()
        bar := newProgressBar("Extracting...", totalSize, "bytes")
        defer bar.Cancel() // clean up if we return early on error

        const chunkSize = 4 * 1024 * 1024
        buf := make([]byte, chunkSize)
        var offset int64

        for offset < totalSize {
                remaining := totalSize - offset
                readSize := int64(chunkSize)
                if readSize > remaining {
                        readSize = remaining
                }

                n, err := reader.ReadAt(buf[:readSize], offset)
                if err != nil && err != io.EOF {
                        return fmt.Errorf("read at offset %d: %w", offset, err)
                }

                if n == 0 {
                        return fmt.Errorf("unexpected EOF at offset %d (expected %d bytes)", offset, totalSize)
                }

                if _, err := out.Write(buf[:n]); err != nil {
                        return fmt.Errorf("write at offset %d: %w", offset, err)
                }

                offset += int64(n)
                bar.Update(offset)
        }
        bar.Finish()

        if err := out.Close(); err != nil {
                return fmt.Errorf("close output: %w", err)
        }

        printInfo("\nExtracted %s bytes to %s\n", formatInt(totalSize), outputPath)
        return nil
}

package main

import (
        "fmt"
        "os"
)

func printUsage() {
        fmt.Print(`mkvdup - MKV deduplication tool using FUSE

Usage: mkvdup [options] <command> [args...]

Commands:
  create        Create dedup file from MKV + source directory
  batch-create  Create multiple dedup files from one source
  probe         Quick test if MKV matches source(s)
  mount         Mount dedup files as FUSE filesystem
  info          Show dedup file information
  verify        Verify dedup file against original MKV
  extract       Rebuild original MKV from dedup + source
  check         Check dedup + source file integrity
  stats         Show space savings and file statistics
  validate      Validate configuration files
  reload        Reload running daemon's configuration
  expand-config Expand wildcard config to explicit file list
  relocate      Move dedup file + sidecar, updating paths

Analysis commands:
  deltadiag    Analyze unmatched regions by stream type

Debug commands:
  parse-mkv    Parse MKV and show packet info
  index-source Index source directory
  match        Match MKV packets to source

Options:
  -v, --verbose      Enable verbose output
  -q, --quiet        Suppress informational progress output
  --no-progress      Disable progress bars (still show status messages)
  --log-file PATH    Duplicate output to a log file (non-TTY style)
  --log-verbose      Enable verbose output in log file only
  -h, --help         Show help
  --version          Show version
`)
        fmt.Print(debugOptionsHelp())
        fmt.Print(`Run 'mkvdup <command> --help' for more information on a command.
See 'man mkvdup' for detailed documentation.
`)
}

func printCommandUsage(cmd string) {
        switch cmd {
        case "create":
                printCreateUsage()
        case "batch-create":
                printBatchCreateUsage()
        case "probe":
                printProbeUsage()
        case "mount":
                printMountUsage()
        case "info":
                printInfoUsage()
        case "verify":
                printVerifyUsage()
        case "extract":
                printExtractUsage()
        case "check":
                printCheckUsage()
        case "stats":
                printStatsUsage()
        case "validate":
                printValidateUsage()
        case "reload":
                printReloadUsage()
        case "expand-config":
                printExpandConfigUsage()
        case "relocate":
                printRelocateUsage()
        case "deltadiag":
                printDeltadiagUsage()
        case "parse-mkv":
                printParseMKVUsage()
        case "index-source":
                printIndexSourceUsage()
        case "match":
                printMatchUsage()
        default:
                printUsage()
        }
}

func printCreateUsage() {
        fmt.Print(`Usage: mkvdup create [options] <mkv-file> <source-dir> <output> [name]

Create a dedup file from an MKV and its source media.

Arguments:
    <mkv-file>    Path to the MKV file to deduplicate
    <source-dir>  Directory containing source media (ISO files or BDMV folders)
    <output>      Output .mkvdup file path
    [name]        Display name in FUSE mount (default: basename of mkv-file;
                  .mkv extension auto-added if missing)

Options:
    -v, --verbose       Enable verbose/debug output
    --log-file PATH     Duplicate output to a log file (non-TTY style)
    --log-verbose       Enable verbose output in log file only
    --warn-threshold N  Minimum space savings percentage to avoid warning (default: 75)
    --non-interactive   Don't prompt on codec mismatch (show warning and continue)

Before matching, codecs in the MKV are compared against the source media.
If a mismatch is detected (e.g., MKV has H.264 but source is MPEG-2), you
will be prompted to continue. Use --non-interactive for scripted usage.

After writing, the dedup file is verified against the original MKV. If
verification fails, the output is renamed to <output>.failed and the
command exits with code 1.

Examples:
    mkvdup create movie.mkv /media/dvd-backups movie.mkvdup
    mkvdup create movie.mkv /media/dvd-backups movie.mkvdup "My Movie"
    mkvdup create --warn-threshold 50 movie.mkv /media/dvd-backups movie.mkvdup
    mkvdup create --non-interactive movie.mkv /media/dvd-backups movie.mkvdup
`)
}

func printBatchCreateUsage() {
        fmt.Print(`Usage: mkvdup batch-create [options] <manifest.yaml>

Create multiple dedup files from a YAML manifest. Files sharing the same
source directory are grouped and the source is indexed once per group.

Codec compatibility is checked for each file. If a mismatch is detected,
a warning is printed but processing continues (non-interactive mode).
Use --skip-codec-mismatch to skip mismatched files instead.

Arguments:
    <manifest.yaml>  YAML manifest file specifying source(s) and MKV files

Options:
    -v, --verbose          Enable verbose/debug output
    --log-file PATH        Duplicate output to a log file (non-TTY style)
    --log-verbose          Enable verbose output in log file only
    --warn-threshold N     Minimum space savings percentage to avoid warning (default: 75)
    --skip-codec-mismatch  Skip MKVs with codec mismatch instead of processing them

Manifest format:
    source_dir: /media/dvd-backups/disc1   # default for all files (optional)
    files:
      - mkv: episode1.mkv
        output: episode1.mkvdup
        name: "Show/S01/Episode 1"         # optional (.mkv auto-added)
      - mkv: episode2.mkv
        output: episode2.mkvdup
      - mkv: movie.mkv
        output: movie.mkvdup
        source_dir: /media/dvd-backups/disc2  # per-file override

Fields:
    source_dir          Default source directory (optional if all files specify their own)
    files               List of MKV files to process (required, at least one)
    files[].mkv         Path to MKV file (required)
    files[].output      Output .mkvdup file (required)
    files[].source_dir  Source directory for this file (overrides top-level default)
    files[].name        Display name in FUSE mount (default: basename of mkv;
                        .mkv extension auto-added if missing)

Relative paths are resolved against the manifest file's directory.

Partial failure handling:
    If one file fails, processing continues for the remaining files.
    If verification fails for a file, the output is renamed to <output>.failed
    and shown as FAIL in the summary.
    Exit code is 0 if any file succeeded (including cached outputs from
    prior runs), or if all files were skipped.
    Exit code is 1 only if all processed files failed.

Examples:
    mkvdup batch-create episodes.yaml
    mkvdup batch-create --warn-threshold 50 episodes.yaml
    mkvdup batch-create --skip-codec-mismatch episodes.yaml
`)
}

func printProbeUsage() {
        fmt.Print(`Usage: mkvdup probe <mkv-file>... -- <source-dir>...

Quick test to check if MKV file(s) match one or more source directories.
When multiple MKVs are provided, each source is indexed only once.

Arguments:
    <mkv-file>    One or more MKV files to test (before --)
    --            Separator between MKV files and source directories
    <source-dir>  One or more directories to test against (after --)

For backward compatibility, a single MKV without -- is also supported:
    mkvdup probe movie.mkv /media/disc1 /media/disc2

Examples:
    mkvdup probe movie.mkv /media/disc1 /media/disc2
    mkvdup probe ep1.mkv ep2.mkv ep3.mkv -- /media/disc1 /media/disc2
`)
}

func printMountUsage() {
        os.Stdout.WriteString(`Usage: mkvdup mount [options] <mountpoint> [config.yaml...]

Mount dedup files as a FUSE filesystem.

Arguments:
    <mountpoint>   Directory to mount the filesystem
    [config.yaml]  YAML config files (default: /etc/mkvdup.conf)

Options:
    --allow-other          Allow other users to access the mount
    --foreground           Run in foreground (for debugging or systemd)
    --config-dir           Treat config argument as directory of YAML files (.yaml, .yml)
    --pid-file PATH        Write daemon PID to file
    --daemon-timeout DUR   Timeout waiting for daemon startup (default: 30s)

Permission Options:
    --default-uid UID          Default UID for files and directories (default: calling user's UID)
    --default-gid GID          Default GID for files and directories (default: calling user's GID)
    --default-file-mode MODE   Default mode for files (octal, default: 0444)
    --default-dir-mode MODE    Default mode for directories (octal, default: 0555)
    --permissions-file PATH    Path to permissions file (overrides default locations)

Source Watch Options:
    --no-source-watch                    Disable source file monitoring (enabled by default)
    --on-source-change ACTION            Action on source change: warn, disable, checksum (default)
                                         warn     - log a warning
                                         disable  - disable affected virtual files (reads return EIO)
                                         checksum - size change: disable immediately
                                                    timestamp-only: verify checksum in background,
                                                    disable on mismatch, re-enable on pass
    --source-watch-poll-interval DUR     Poll interval for source file changes (default: 60s)
    --source-read-timeout DUR            Read timeout for network FS sources (default: 30s)

Config Watch Options:
    --no-config-watch                    Disable config file monitoring (enabled by default)
    --on-config-change ACTION            Action on config change: reload (default), warn
                                         reload - automatically reload configuration
                                         warn   - log a warning only

Error Notification (configured in YAML config, not CLI):
    on_error_command:
      command: ["/path/to/script", "%source%", "%event%", "%files%"]
      timeout: 30s          # command timeout (default: 30s)
      batch_interval: 5s    # debounce window for batching events (default: 5s)
    Placeholders: %source% (path), %files% (affected files), %event% (error type)
    String form (sh -c) auto-escapes placeholders; do not add your own quotes.
    See docs/FUSE.md for details.

By default, mkvdup daemonizes after the mount is ready and returns.
Use --foreground to keep it attached to the terminal.

Permission files are searched in order:
  1. --permissions-file (if specified)
  2. ~/.config/mkvdup/permissions.yaml (if exists)
  3. /etc/mkvdup/permissions.yaml (if exists)
New permissions are written to ~/.config/mkvdup/permissions.yaml (user) or
/etc/mkvdup/permissions.yaml (root).

Examples:
    mkvdup mount /mnt/videos movie.mkvdup.yaml
    mkvdup mount /mnt/videos *.yaml
    mkvdup mount --allow-other /mnt/videos
    mkvdup mount --config-dir /mnt/videos /etc/mkvdup.d/
    mkvdup mount --foreground /mnt/videos config.yaml
    mkvdup mount --default-uid 1000 --default-gid 1000 /mnt/videos config.yaml
    mkvdup mount --source-watch-poll-interval 10s /mnt/videos config.yaml
    mkvdup mount --source-read-timeout 1m /mnt/videos config.yaml
`)
}

func printInfoUsage() {
        fmt.Print(`Usage: mkvdup info [options] <dedup-file>

Show information about a dedup file.

Arguments:
    <dedup-file>  Path to the .mkvdup file

Options:
    --hide-unused-files  Hide source files not referenced by any index entry

Examples:
    mkvdup info movie.mkvdup
    mkvdup info --hide-unused-files movie.mkvdup
`)
}

func printVerifyUsage() {
        fmt.Print(`Usage: mkvdup verify <dedup-file> <source-dir> <original-mkv>

Verify that a dedup file correctly reconstructs the original MKV.

Arguments:
    <dedup-file>    Path to the .mkvdup file
    <source-dir>    Directory containing the source media
    <original-mkv>  Path to the original MKV for comparison

Examples:
    mkvdup verify movie.mkvdup /media/dvd-backups original.mkv
`)
}

func printExtractUsage() {
        fmt.Print(`Usage: mkvdup extract <dedup-file> <source-dir> <output-mkv>

Rebuild the original MKV from a dedup file and source media.

Arguments:
    <dedup-file>    Path to the .mkvdup file
    <source-dir>    Directory containing the source media
    <output-mkv>    Path for the reconstructed MKV file

Examples:
    mkvdup extract movie.mkvdup /media/dvd-backups restored-movie.mkv
`)
}

func printCheckUsage() {
        fmt.Print(`Usage: mkvdup check <dedup-file> <source-dir> [options]

Check integrity of a dedup file and its source files.

Arguments:
    <dedup-file>  Path to the .mkvdup file
    <source-dir>  Directory containing the source media

Options:
    --source-checksums  Verify source file checksums (slow, reads entire files)

Checks performed:
    - Dedup file header validity (magic, version, structure)
    - Index and delta checksum verification
    - Source file existence and size
    With --source-checksums:
    - Source file checksum verification (reads entire files)

Examples:
    mkvdup check movie.mkvdup /media/dvd-backups
    mkvdup check --source-checksums movie.mkvdup /media/dvd-backups
`)
}

func printStatsUsage() {
        fmt.Print(`Usage: mkvdup stats [options] <config.yaml...>

Show space savings and file statistics for mkvdup-managed files.

Arguments:
    <config.yaml>  YAML config files (same format as mount/validate)

Options:
    --config-dir   Treat config argument as directory of YAML files (.yaml, .yml)

Output includes per-file statistics (original size, dedup file size, space
savings, source type) and a rollup summary when multiple files are present.

Examples:
    mkvdup stats config.yaml
    mkvdup stats --config-dir /etc/mkvdup.d/
    mkvdup stats movie1.yaml movie2.yaml
`)
}

func printValidateUsage() {
        fmt.Print(`Usage: mkvdup validate [options] <config.yaml...>

Validate configuration files for correctness before mounting.

Arguments:
    <config.yaml>  YAML config files to validate

Options:
    --config-dir   Treat config argument as directory of YAML files (.yaml, .yml)
    --deep         Verify dedup file headers and internal checksums
    --strict       Treat warnings as errors (exit 1 on warnings)

Validations performed:
    - YAML syntax and required fields (name, dedup_file, source_dir)
    - Include cycle detection
    - Dedup file existence and header validity
    - Source directory existence
    - Duplicate virtual file names (warning)
    - File/directory path conflicts (warning)
    - Invalid path names (empty, contains "..")
    With --deep:
    - Dedup file internal checksum verification

Exit codes:
    0  All configs valid (warnings may be present)
    1  Errors found (or warnings with --strict)

Examples:
    mkvdup validate config.yaml
    mkvdup validate *.yaml
    mkvdup validate --config-dir /etc/mkvdup.d/
    mkvdup validate --deep --strict /etc/mkvdup.conf
`)
}

func printReloadUsage() {
        fmt.Print(`Usage: mkvdup reload {--pid-file PATH | --pid PID} [options] [config.yaml...]

Reload a running daemon's configuration by validating the config
and sending SIGHUP to the daemon process.

The config is validated BEFORE sending the signal. If validation
fails, the signal is not sent and the error is reported.

If no config files are specified, the signal is sent without
pre-validation (the daemon validates internally on SIGHUP).

Arguments:
    [config.yaml]  Config files to validate (same as mount's config args)

Required (one of):
    --pid-file PATH    PID file of running daemon (must match mount's --pid-file)
    --pid PID          PID of the running daemon (e.g., for foreground mode)

Options:
    --config-dir       Treat config argument as directory of YAML files

Examples:
    mkvdup reload --pid-file /run/mkvdup.pid config.yaml
    mkvdup reload --pid-file /run/mkvdup.pid --config-dir /etc/mkvdup.d/
    mkvdup reload --pid-file /run/mkvdup.pid
    mkvdup reload --pid $(pidof mkvdup)
`)
}

func printExpandConfigUsage() {
        os.Stdout.WriteString(`Usage: mkvdup expand-config [options] <config-file>

Expand a mount config's include globs into explicit file paths.

Reads a standard mount config file (the same format accepted by mount,
validate, and reload), resolves its includes glob patterns to explicit
paths, and writes an expanded config. All other settings (on_error_command,
virtual_files, top-level mappings) are preserved unchanged. The included
files themselves are not modified and can still contain their own globs.

Arguments:
    <config-file>  Config file to expand (same format as mount)

Options:
    --output PATH    Write expanded config to PATH (default: stdout)
    --dry-run        Preview expanded output without writing

Example input (standard mount config):
    includes:
      - "/data/isos/dvds/**/*.mkvdup.yaml"
    on_error_command:
      command: ["curl", "-d", "%source%", "https://ntfy.sh/mkvdup"]

Output (globs resolved, all other settings preserved):
    # Auto-generated by: mkvdup expand-config
    # Source: /path/to/mount-config.yaml
    # Generated: 2026-03-24T12:00:00Z
    includes:
      - "/data/isos/dvds/movie1/movie1.mkvdup.yaml"
      - "/data/isos/dvds/movie2/movie2.mkvdup.yaml"
    on_error_command:
      command: ["curl", "-d", "%source%", "https://ntfy.sh/mkvdup"]

The output is a drop-in replacement for the original config, usable
directly with mount:
    mkvdup mount /mnt/videos expanded-config.yaml

If the --output file already exists and the content is unchanged,
the file is not rewritten (avoiding unnecessary reloads).

Recommended workflow:
    1. Keep a mount config with include globs as the source of truth
    2. Run 'mkvdup expand-config config.yaml --output expanded.yaml'
    3. Point the FUSE mount at expanded.yaml
    4. When new .mkvdup.yaml files are added, re-run expand-config
    5. Reload the mount: mkvdup reload --pid-file /run/mkvdup.pid

Examples:
    mkvdup expand-config mount-config.yaml
    mkvdup expand-config mount-config.yaml --output expanded.yaml
    mkvdup expand-config --dry-run mount-config.yaml
`)
}

func printRelocateUsage() {
        fmt.Print(`Usage: mkvdup relocate [options] <source.mkvdup> <destination>

Move an .mkvdup file and its .mkvdup.yaml sidecar to a new location,
updating relative paths in the sidecar so they resolve to the same
absolute locations from the new position.

Arguments:
    <source.mkvdup>  Path to the .mkvdup file to move
    <destination>    Destination path (file or directory)

Options:
    --dry-run  Preview changes without moving files
    --force    Overwrite destination if it already exists

If <destination> is an existing directory, the file is moved into that
directory with its original filename. Otherwise, <destination> is used
as the new file path.

The .mkvdup.yaml sidecar (if present) is moved alongside the .mkvdup
file. The dedup_file path is updated to reference the new .mkvdup
location. The source_dir path is recalculated so it resolves to the
same absolute location from the new position (absolute source_dir
paths are preserved unchanged).

Before moving, the command validates that source directories referenced
by the sidecar would remain reachable from the new location. If not,
the move is refused.

Examples:
    mkvdup relocate movie.mkvdup /new/location/movie.mkvdup
    mkvdup relocate movie.mkvdup /new/location/
    mkvdup relocate --dry-run movie.mkvdup /new/location/
    mkvdup relocate --force movie.mkvdup /new/location/movie.mkvdup
`)
}

func printDeltadiagUsage() {
        fmt.Print(`Usage: mkvdup deltadiag <dedup-file> <mkv-file>

Analyze unmatched (delta) regions in a dedup file by cross-referencing
with the original MKV to determine what stream type each delta region
belongs to (video, audio, or container overhead).

For video delta, further classifies by H.264 NAL type (IDR/non-IDR slices,
SEI, SPS, PPS, etc.) and shows size breakdown.

Works with dedup file versions 3 through 8 (DVD, Blu-ray, and newer).

Arguments:
    <dedup-file>  Path to the .mkvdup file
    <mkv-file>    Path to the original MKV file

Examples:
    mkvdup deltadiag movie.mkvdup movie.mkv
`)
}

func printParseMKVUsage() {
        fmt.Print(`Usage: mkvdup parse-mkv <mkv-file>

Parse an MKV file and display packet information (debugging).

Arguments:
    <mkv-file>  Path to the MKV file to parse

Examples:
    mkvdup parse-mkv movie.mkv
`)
}

func printIndexSourceUsage() {
        fmt.Print(`Usage: mkvdup index-source <source-dir>

Index a source directory and display statistics (debugging).

Arguments:
    <source-dir>  Directory containing source media (ISO files or BDMV folders)

Examples:
    mkvdup index-source /media/dvd-backups
`)
}

func printMatchUsage() {
        fmt.Print(`Usage: mkvdup match <mkv-file> <source-dir>

Match MKV packets to source and show detailed results (debugging).

Arguments:
    <mkv-file>    Path to the MKV file
    <source-dir>  Directory containing source media

Examples:
    mkvdup match movie.mkv /media/dvd-backups
`)
}

// Command mkvdup is the CLI tool for MKV-ISO deduplication.
package main

import (
        "fmt"
        "io"
        "log"
        "os"
        "strconv"
        "strings"
        "time"

        "github.com/stuckj/mkvdup/internal/daemon"
        "github.com/stuckj/mkvdup/internal/dedup"
)

// MountOptions holds all options for the mount command.
type MountOptions struct {
        AllowOther              bool
        Foreground              bool
        ConfigDir               bool
        PidFile                 string
        DaemonTimeout           time.Duration
        PermissionsFile         string
        DefaultUID              uint32
        DefaultGID              uint32
        DefaultFileMode         uint32
        DefaultDirMode          uint32
        NoSourceWatch           bool                      // Disable source file watching
        OnSourceChange          string                    // Action on source change: "warn", "disable", "checksum"
        SourceWatchPollInterval time.Duration             // Poll interval for network FS source watching (0 = 60s default)
        SourceReadTimeout       time.Duration             // Pread timeout for network FS sources (0 = disabled; CLI default 30s)
        OnErrorCommand          *dedup.ErrorCommandConfig // External command to run on source integrity error (from YAML config)
        NoConfigWatch           bool                      // Disable config file watching
        OnConfigChange          string                    // Action on config change: "reload", "warn"
}

// parseUint32 parses a string as uint32.
func parseUint32(s string) (uint32, error) {
        v, err := strconv.ParseUint(s, 10, 32)
        if err != nil {
                return 0, err
        }
        return uint32(v), nil
}

// parseOctalMode parses a string as an octal file mode.
func parseOctalMode(s string) (uint32, error) {
        // Strip leading 0 prefix for octal if present
        v, err := strconv.ParseUint(s, 8, 32)
        if err != nil {
                return 0, err
        }
        return uint32(v), nil
}

// parseWarnFlags extracts --warn-threshold from args, returning the
// parsed value and the remaining positional arguments.
func parseWarnFlags(args []string) (warnThreshold float64, remaining []string) {
        warnThreshold = 75.0
        for i := 0; i < len(args); i++ {
                switch args[i] {
                case "--warn-threshold":
                        if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                v, err := strconv.ParseFloat(args[i+1], 64)
                                if err != nil {
                                        log.Fatalf("Error: --warn-threshold invalid: %v", err)
                                }
                                if v < 0 || v > 100 {
                                        log.Fatalf("Error: --warn-threshold must be between 0 and 100")
                                }
                                warnThreshold = v
                                i++
                        } else {
                                log.Fatalf("Error: --warn-threshold requires a numeric argument")
                        }
                default:
                        remaining = append(remaining, args[i])
                }
        }
        return
}

// isTerminalStdout returns true if stdout is a terminal (not piped/redirected).
func isTerminalStdout() bool {
        fi, err := os.Stdout.Stat()
        if err != nil {
                return false
        }
        return fi.Mode()&os.ModeCharDevice != 0
}

// version is set at build time via -ldflags
var version = "dev"

// verbose is set to true when -v flag is passed
var verbose bool

// logVerbose enables verbose diagnostics only in the log file (not on console)
var logVerbose bool

// showProgress controls whether progress bars are rendered. Set to false by
// --no-progress, --quiet, or when stdout is not a TTY.
var showProgress = true

// quiet suppresses all informational stdout output. Errors still go to stderr.
var quiet bool

func printVersion() {
        fmt.Printf("mkvdup version %s\n", version)
}

func main() {
        // Process global flags before command
        args := os.Args[1:]
        var filteredArgs []string
        showHelp := false
        showVersion := false

        // Extract --cpuprofile flag (only available in debug builds)
        args, cpuprofile := parseCPUProfileFlag(args)
        defer startCPUProfile(cpuprofile)()

        for i := 0; i < len(args); i++ {
                arg := args[i]
                switch {
                case arg == "-v" || arg == "--verbose":
                        verbose = true
                case arg == "-h" || arg == "--help":
                        showHelp = true
                case arg == "--version":
                        showVersion = true
                case arg == "--log-verbose":
                        logVerbose = true
                case arg == "--no-progress":
                        showProgress = false
                case arg == "-q" || arg == "--quiet":
                        quiet = true
                        showProgress = false
                case arg == "--log-file":
                        if i+1 < len(args) {
                                i++
                                var err error
                                logFile, err = os.Create(args[i])
                                if err != nil {
                                        log.Fatalf("Error: cannot create log file %s: %v", args[i], err)
                                }
                        } else {
                                log.Fatalf("Error: --log-file requires a path argument")
                        }
                default:
                        filteredArgs = append(filteredArgs, arg)
                }
        }
        args = filteredArgs

        // Auto-disable progress bars when stdout is not a TTY
        if !isTerminalStdout() {
                showProgress = false
        }

        // Duplicate log package output (used for warnings and fatal errors) to
        // the log file so that log.Printf and log.Fatalf messages appear there too.
        if logFile != nil {
                log.SetOutput(io.MultiWriter(os.Stderr, logFile))
                defer logFile.Close()
        }

        // Handle --version (always top-level)
        if showVersion {
                printVersion()
                os.Exit(0)
        }

        // If no command given, show appropriate help
        if len(args) < 1 {
                if showHelp {
                        printUsage()
                        os.Exit(0)
                }
                printUsage()
                os.Exit(1)
        }

        cmd := args[0]
        args = args[1:]

        // If help flag was given with a command, show command-specific help
        if showHelp {
                printCommandUsage(cmd)
                os.Exit(0)
        }

        switch cmd {
        case "create":
                warnThreshold, remaining := parseWarnFlags(args)
                nonInteractive := false
                var createArgs []string
                for i := 0; i < len(remaining); i++ {
                        switch remaining[i] {
                        case "--non-interactive":
                                nonInteractive = true
                        default:
                                createArgs = append(createArgs, remaining[i])
                        }
                }
                if len(createArgs) < 3 {
                        printCommandUsage("create")
                        os.Exit(1)
                }
                output := createArgs[2]
                name := ""
                if len(createArgs) >= 4 {
                        name = createArgs[3]
                }
                if err := createDedup(createArgs[0], createArgs[1], output, name, warnThreshold, nonInteractive); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "batch-create":
                warnThreshold, remaining := parseWarnFlags(args)
                skipCodecMismatch := false
                var batchArgs []string
                for _, arg := range remaining {
                        if arg == "--skip-codec-mismatch" {
                                skipCodecMismatch = true
                        } else {
                                batchArgs = append(batchArgs, arg)
                        }
                }
                if len(batchArgs) < 1 {
                        printCommandUsage("batch-create")
                        os.Exit(1)
                }
                if err := createBatch(batchArgs[0], warnThreshold, skipCodecMismatch); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "probe":
                if len(args) < 2 {
                        printCommandUsage("probe")
                        os.Exit(1)
                }
                // Split on "--": MKVs before, sources after
                // For backward compat: if no "--", first arg is MKV, rest are sources
                var mkvPaths, sourceDirs []string
                sepIdx := -1
                for i, a := range args {
                        if a == "--" {
                                sepIdx = i
                                break
                        }
                }
                if sepIdx >= 0 {
                        mkvPaths = args[:sepIdx]
                        sourceDirs = args[sepIdx+1:]
                } else {
                        mkvPaths = args[:1]
                        sourceDirs = args[1:]
                }
                if len(mkvPaths) == 0 || len(sourceDirs) == 0 {
                        printCommandUsage("probe")
                        os.Exit(1)
                }
                if err := probe(mkvPaths, sourceDirs); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "mount":
                // Parse mount-specific options
                allowOther := false
                foreground := false
                configDir := false
                pidFile := ""
                daemonTimeout := 30 * time.Second
                permissionsFile := ""
                defaultUID := uint32(os.Getuid())
                defaultGID := uint32(os.Getgid())
                defaultFileMode := uint32(0444)
                defaultDirMode := uint32(0555)
                noSourceWatch := false
                onSourceChange := "checksum"
                sourceWatchPollInterval := time.Duration(0)
                sourceReadTimeout := 30 * time.Second
                noConfigWatch := false
                onConfigChange := "reload"
                var mountArgs []string
                for i := 0; i < len(args); i++ {
                        switch args[i] {
                        case "--allow-other":
                                allowOther = true
                        case "--foreground", "-f":
                                foreground = true
                        case "--config-dir":
                                configDir = true
                        case "--pid-file":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        pidFile = args[i+1]
                                        i++
                                } else {
                                        log.Fatalf("Error: --pid-file requires a path argument")
                                }
                        case "--daemon-timeout":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        d, err := time.ParseDuration(args[i+1])
                                        if err != nil {
                                                log.Fatalf("Error: --daemon-timeout invalid duration: %v", err)
                                        }
                                        daemonTimeout = d
                                        i++
                                } else {
                                        log.Fatalf("Error: --daemon-timeout requires a duration argument (e.g., 30s, 1m)")
                                }
                        case "--permissions-file":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        permissionsFile = args[i+1]
                                        i++
                                } else {
                                        log.Fatalf("Error: --permissions-file requires a path argument")
                                }
                        case "--default-uid":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        uid, err := parseUint32(args[i+1])
                                        if err != nil {
                                                log.Fatalf("Error: --default-uid invalid: %v", err)
                                        }
                                        defaultUID = uid
                                        i++
                                } else {
                                        log.Fatalf("Error: --default-uid requires a numeric argument")
                                }
                        case "--default-gid":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        gid, err := parseUint32(args[i+1])
                                        if err != nil {
                                                log.Fatalf("Error: --default-gid invalid: %v", err)
                                        }
                                        defaultGID = gid
                                        i++
                                } else {
                                        log.Fatalf("Error: --default-gid requires a numeric argument")
                                }
                        case "--default-file-mode":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        mode, err := parseOctalMode(args[i+1])
                                        if err != nil {
                                                log.Fatalf("Error: --default-file-mode invalid: %v", err)
                                        }
                                        defaultFileMode = mode
                                        i++
                                } else {
                                        log.Fatalf("Error: --default-file-mode requires an octal mode argument")
                                }
                        case "--default-dir-mode":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        mode, err := parseOctalMode(args[i+1])
                                        if err != nil {
                                                log.Fatalf("Error: --default-dir-mode invalid: %v", err)
                                        }
                                        defaultDirMode = mode
                                        i++
                                } else {
                                        log.Fatalf("Error: --default-dir-mode requires an octal mode argument")
                                }
                        case "--no-source-watch":
                                noSourceWatch = true
                        case "--on-source-change":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        onSourceChange = args[i+1]
                                        switch onSourceChange {
                                        case "warn", "disable", "checksum":
                                                // valid
                                        default:
                                                log.Fatalf("Error: --on-source-change must be warn, disable, or checksum")
                                        }
                                        i++
                                } else {
                                        log.Fatalf("Error: --on-source-change requires an argument (warn, disable, or checksum)")
                                }
                        case "--source-watch-poll-interval":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        d, err := time.ParseDuration(args[i+1])
                                        if err != nil {
                                                log.Fatalf("Error: --source-watch-poll-interval invalid duration: %v", err)
                                        }
                                        if d <= 0 {
                                                log.Fatalf("Error: --source-watch-poll-interval must be positive")
                                        }
                                        sourceWatchPollInterval = d
                                        i++
                                } else {
                                        log.Fatalf("Error: --source-watch-poll-interval requires a duration argument (e.g., 10s, 5m)")
                                }
                        case "--source-read-timeout":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        d, err := time.ParseDuration(args[i+1])
                                        if err != nil {
                                                log.Fatalf("Error: --source-read-timeout invalid duration: %v", err)
                                        }
                                        if d < 0 {
                                                log.Fatalf("Error: --source-read-timeout must be non-negative")
                                        }
                                        sourceReadTimeout = d
                                        i++
                                } else {
                                        log.Fatalf("Error: --source-read-timeout requires a duration argument (e.g., 30s, 1m)")
                                }
                        case "--no-config-watch":
                                noConfigWatch = true
                        case "--on-config-change":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        onConfigChange = args[i+1]
                                        switch onConfigChange {
                                        case "reload", "warn":
                                                // valid
                                        default:
                                                log.Fatalf("Error: --on-config-change must be reload or warn")
                                        }
                                        i++
                                } else {
                                        log.Fatalf("Error: --on-config-change requires an argument (reload or warn)")
                                }
                        default:
                                mountArgs = append(mountArgs, args[i])
                        }
                }
                if len(mountArgs) < 1 {
                        printCommandUsage("mount")
                        os.Exit(1)
                }
                mountpoint := mountArgs[0]
                configPaths := mountArgs[1:]
                mountOpts := MountOptions{
                        AllowOther:              allowOther,
                        Foreground:              foreground,
                        ConfigDir:               configDir,
                        PidFile:                 pidFile,
                        DaemonTimeout:           daemonTimeout,
                        PermissionsFile:         permissionsFile,
                        DefaultUID:              defaultUID,
                        DefaultGID:              defaultGID,
                        DefaultFileMode:         defaultFileMode,
                        DefaultDirMode:          defaultDirMode,
                        NoSourceWatch:           noSourceWatch,
                        OnSourceChange:          onSourceChange,
                        SourceWatchPollInterval: sourceWatchPollInterval,
                        SourceReadTimeout:       sourceReadTimeout,
                        NoConfigWatch:           noConfigWatch,
                        OnConfigChange:          onConfigChange,
                }
                if err := mountFuse(mountpoint, configPaths, mountOpts); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "info":
                hideUnused := false
                var infoArgs []string
                for _, a := range args {
                        if a == "--hide-unused-files" {
                                hideUnused = true
                        } else {
                                infoArgs = append(infoArgs, a)
                        }
                }
                if len(infoArgs) < 1 {
                        printCommandUsage("info")
                        os.Exit(1)
                }
                if err := showInfo(infoArgs[0], hideUnused); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "verify":
                if len(args) < 3 {
                        printCommandUsage("verify")
                        os.Exit(1)
                }
                if err := verifyDedup(args[0], args[1], args[2]); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "extract":
                if len(args) < 3 {
                        printCommandUsage("extract")
                        os.Exit(1)
                }
                if err := extractDedup(args[0], args[1], args[2]); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "check":
                sourceChecksums := false
                var checkArgs []string
                for i := 0; i < len(args); i++ {
                        switch args[i] {
                        case "--source-checksums":
                                sourceChecksums = true
                        default:
                                checkArgs = append(checkArgs, args[i])
                        }
                }
                if len(checkArgs) < 2 {
                        printCommandUsage("check")
                        os.Exit(1)
                }
                if err := checkDedup(checkArgs[0], checkArgs[1], sourceChecksums); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "stats":
                configDir := false
                var statsArgs []string
                for _, arg := range args {
                        if arg == "--config-dir" {
                                configDir = true
                        } else {
                                statsArgs = append(statsArgs, arg)
                        }
                }
                if len(statsArgs) < 1 {
                        printCommandUsage("stats")
                        os.Exit(1)
                }
                if err := showStats(statsArgs, configDir); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "validate":
                configDir := false
                deep := false
                strict := false
                var valArgs []string
                for i := 0; i < len(args); i++ {
                        switch args[i] {
                        case "--config-dir":
                                configDir = true
                        case "--deep":
                                deep = true
                        case "--strict":
                                strict = true
                        default:
                                valArgs = append(valArgs, args[i])
                        }
                }
                if len(valArgs) < 1 {
                        printCommandUsage("validate")
                        os.Exit(1)
                }
                os.Exit(validateConfigs(valArgs, configDir, deep, strict))

        case "reload":
                if len(args) == 0 {
                        printCommandUsage("reload")
                        os.Exit(1)
                }
                pidFile := ""
                pidDirect := 0
                configDir := false
                var reloadArgs []string
                for i := 0; i < len(args); i++ {
                        switch args[i] {
                        case "--pid-file":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        pidFile = args[i+1]
                                        i++
                                } else {
                                        log.Fatalf("Error: --pid-file requires a path argument")
                                }
                        case "--pid":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        p, err := strconv.Atoi(args[i+1])
                                        if err != nil || p <= 0 {
                                                log.Fatalf("Error: --pid requires a positive integer argument")
                                        }
                                        pidDirect = p
                                        i++
                                } else {
                                        log.Fatalf("Error: --pid requires a PID argument")
                                }
                        case "--config-dir":
                                configDir = true
                        default:
                                reloadArgs = append(reloadArgs, args[i])
                        }
                }
                if pidFile != "" && pidDirect != 0 {
                        log.Fatalf("Error: --pid-file and --pid are mutually exclusive")
                }
                var pid int
                if pidDirect != 0 {
                        pid = pidDirect
                } else if pidFile != "" {
                        var err error
                        pid, err = daemon.ReadPidFile(pidFile)
                        if err != nil {
                                log.Fatalf("Error: %v", err)
                        }
                } else {
                        log.Fatalf("Error: --pid-file or --pid is required for reload")
                }
                if err := reloadDaemon(pid, reloadArgs, configDir); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "expand-config":
                outputPath := ""
                dryRun := false
                var expandArgs []string
                for i := 0; i < len(args); i++ {
                        switch args[i] {
                        case "--output":
                                if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
                                        outputPath = args[i+1]
                                        i++
                                } else {
                                        log.Fatalf("Error: --output requires a path argument")
                                }
                        case "--dry-run":
                                dryRun = true
                        default:
                                expandArgs = append(expandArgs, args[i])
                        }
                }
                if len(expandArgs) != 1 {
                        printCommandUsage("expand-config")
                        os.Exit(1)
                }
                if err := expandConfigCmd(expandArgs[0], outputPath, dryRun); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "relocate":
                force := false
                dryRun := false
                var relocateArgs []string
                for i := 0; i < len(args); i++ {
                        switch args[i] {
                        case "--force":
                                force = true
                        case "--dry-run":
                                dryRun = true
                        default:
                                relocateArgs = append(relocateArgs, args[i])
                        }
                }
                if len(relocateArgs) != 2 {
                        printCommandUsage("relocate")
                        os.Exit(1)
                }
                if err := relocateDedup(relocateArgs[0], relocateArgs[1], force, dryRun); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "deltadiag":
                if len(args) < 2 {
                        printCommandUsage("deltadiag")
                        os.Exit(1)
                }
                if err := deltadiag(args[0], args[1]); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "parse-mkv":
                if len(args) < 1 {
                        printCommandUsage("parse-mkv")
                        os.Exit(1)
                }
                if err := parseMKV(args[0]); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "index-source":
                if len(args) < 1 {
                        printCommandUsage("index-source")
                        os.Exit(1)
                }
                if err := indexSource(args[0]); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "match":
                if len(args) < 2 {
                        printCommandUsage("match")
                        os.Exit(1)
                }
                if err := matchMKV(args[0], args[1]); err != nil {
                        log.Fatalf("Error: %v", err)
                }

        case "help":
                if len(args) > 0 {
                        printCommandUsage(args[0])
                } else {
                        printUsage()
                }
                os.Exit(0)

        default:
                printWarn("Unknown command: %s\n\n", cmd)
                printUsage()
                os.Exit(1)
        }
}

//go:build !debug

package main

// parseCPUProfileFlag is a no-op in release builds.
// The --cpuprofile flag is only available in debug builds (go build -tags debug).
func parseCPUProfileFlag(args []string) ([]string, string) {
        return args, ""
}

// debugOptionsHelp returns empty string in release builds.
func debugOptionsHelp() string {
        return ""
}

// startCPUProfile is a no-op in release builds.
func startCPUProfile(_ string) func() {
        return func() {}
}

package main

import (
        "fmt"
        "io"
        "os"
        "strings"
        "time"
)

// logFile is set by --log-file to duplicate output to a file.
// Console output is unchanged; the log file receives non-TTY-style output
// (milestones instead of progress bars, no ANSI escape sequences).
var logFile *os.File

// progressBar renders an in-place progress bar with ETA.
//
// When showProgress is true (TTY mode), it renders:
//
//        Phase 2/6: Building source index...
//          [████████████████████░░░░░░░░░░░░░░░░░░░░]  52%  2.3 GB / 4.5 GB  ETA: 00:00:14
//
// On Finish(), the bar line is cleared and replaced with:
//
//        Phase 2/6: Building source index... done (00:00:27)
//
// When showProgress is false (non-TTY), milestone percentages are printed at
// 10% intervals so redirected logs still show progress.
// When quiet is true, nothing is printed to stdout (log file still receives output).
type progressBar struct {
        prefix        string
        total         int64
        processed     int64
        startTime     time.Time
        lastDraw      time.Time
        unit          string // "bytes" or "packets"
        done          bool
        lastMilestone int // last 10% milestone printed (0-10)
}

const barWidth = 40

// newProgressBar creates and displays a new progress bar.
// The prefix (e.g., "Phase 2/6: Building source index...") is printed immediately.
// Unit should be "bytes" or "packets".
func newProgressBar(prefix string, total int64, unit string) *progressBar {
        p := &progressBar{
                prefix:    prefix,
                total:     total,
                unit:      unit,
                startTime: time.Now(),
        }
        if !quiet {
                fmt.Println(prefix)
        }
        if logFile != nil {
                fmt.Fprintln(logFile, prefix)
        }
        return p
}

// Update sets the current progress and redraws the bar (throttled to 500ms).
func (p *progressBar) Update(processed int64) {
        if p.done {
                return
        }
        p.processed = processed

        // Milestone progress for non-TTY stdout and/or log file
        if (!showProgress && !quiet) || logFile != nil {
                p.updateMilestone()
        }

        if quiet || !showProgress {
                return
        }

        if time.Since(p.lastDraw) < 500*time.Millisecond {
                return
        }
        p.lastDraw = time.Now()
        p.draw()
}

// updateMilestone prints percentage milestones at 10% intervals.
// Output goes to stdout (when non-TTY) and/or the log file.
func (p *progressBar) updateMilestone() {
        if p.total <= 0 {
                return
        }

        pct := float64(p.processed) / float64(p.total) * 100
        milestone := int(pct / 10)
        if milestone > 10 {
                milestone = 10
        }
        if milestone <= p.lastMilestone {
                return
        }
        p.lastMilestone = milestone

        elapsed := time.Since(p.startTime)
        line := fmt.Sprintf("  %d%% (%s)\n", milestone*10, formatDuration(elapsed))

        if !showProgress && !quiet {
                fmt.Print(line)
        }
        if logFile != nil {
                fmt.Fprint(logFile, line)
        }
}

// Cancel cleans up a progress bar on error without printing "done".
// It prints a newline to move past any partial bar line. Safe to call
// after Finish() (no-op if already done).
func (p *progressBar) Cancel() {
        if p.done {
                return
        }
        p.done = true
        if !quiet && showProgress {
                // Clear partial bar line and move to next line
                fmt.Print("\r\033[2K\n")
        }
}

// Finish completes the progress bar and prints the elapsed time.
func (p *progressBar) Finish() {
        if p.done {
                return
        }
        p.done = true
        elapsed := time.Since(p.startTime)

        if !quiet {
                if showProgress {
                        // Clear the bar line, move up, and overwrite the prefix line with completion
                        fmt.Printf("\r\033[2K\033[A\r\033[2K%s done (%s)\n", p.prefix, formatDuration(elapsed))
                } else {
                        fmt.Printf("%s done (%s)\n", p.prefix, formatDuration(elapsed))
                }
        }
        if logFile != nil {
                fmt.Fprintf(logFile, "%s done (%s)\n", p.prefix, formatDuration(elapsed))
        }
}

// draw renders the progress bar line.
func (p *progressBar) draw() {
        if p.total <= 0 {
                return
        }

        pct := float64(p.processed) / float64(p.total)
        if pct > 1.0 {
                pct = 1.0
        }

        // Build the bar: [████████░░░░░░░░░░░░]
        filled := int(pct * float64(barWidth))
        if filled > barWidth {
                filled = barWidth
        }
        bar := strings.Repeat("█", filled) + strings.Repeat("░", barWidth-filled)

        // Build the stats portion
        var stats string
        switch p.unit {
        case "bytes":
                stats = fmt.Sprintf("%s / %s", formatSize(p.processed), formatSize(p.total))
        case "packets":
                stats = fmt.Sprintf("%s / %s", formatInt(p.processed), formatInt(p.total))
        }

        // ETA
        eta := p.eta()

        line := fmt.Sprintf("  [%s] %3.0f%%  %s  ETA: %s", bar, pct*100, stats, eta)

        fmt.Printf("\r\033[2K%s", line)
}

// eta calculates the estimated time remaining.
func (p *progressBar) eta() string {
        elapsed := time.Since(p.startTime)

        // Don't show ETA for first 2 seconds or when no progress
        if elapsed < 2*time.Second || p.processed <= 0 || p.total <= 0 {
                return "--:--:--"
        }

        rate := float64(p.processed) / elapsed.Seconds()
        remaining := float64(p.total-p.processed) / rate
        if remaining < 0 {
                remaining = 0
        }

        return formatDuration(time.Duration(remaining * float64(time.Second)))
}

// formatDuration formats a duration as HH:MM:SS.
func formatDuration(d time.Duration) string {
        d = d.Round(time.Second)
        h := int(d.Hours())
        m := int(d.Minutes()) % 60
        s := int(d.Seconds()) % 60
        return fmt.Sprintf("%02d:%02d:%02d", h, m, s)
}

// formatSize formats a byte count as a human-readable string.
func formatSize(n int64) string {
        const (
                kb = 1024
                mb = 1024 * kb
                gb = 1024 * mb
        )
        switch {
        case n >= gb:
                return fmt.Sprintf("%.1f GB", float64(n)/float64(gb))
        case n >= mb:
                return fmt.Sprintf("%.1f MB", float64(n)/float64(mb))
        case n >= kb:
                return fmt.Sprintf("%.1f KB", float64(n)/float64(kb))
        default:
                return fmt.Sprintf("%d B", n)
        }
}

// printInfo prints informational output, suppressed on stdout when quiet is true.
// Always written to logFile if one is open.
func printInfo(format string, a ...any) {
        if !quiet {
                fmt.Printf(format, a...)
        }
        if logFile != nil {
                fmt.Fprintf(logFile, format, a...)
        }
}

// verboseWriter returns the io.Writer for verbose diagnostic output based on
// the current flag configuration:
//   - verbose + logFile → both stderr and log file (MultiWriter)
//   - verbose only     → stderr
//   - logVerbose + logFile → log file only
//   - otherwise        → nil (verbose disabled)
func verboseWriter() io.Writer {
        if verbose {
                if logFile != nil {
                        return io.MultiWriter(os.Stderr, logFile)
                }
                return os.Stderr
        }
        if logVerbose && logFile != nil {
                return logFile
        }
        return nil
}

// printInfoln prints informational output with a newline, suppressed on stdout when quiet is true.
// Always written to logFile if one is open.
func printInfoln(a ...any) {
        if !quiet {
                fmt.Println(a...)
        }
        if logFile != nil {
                fmt.Fprintln(logFile, a...)
        }
}

// printWarn prints warning/error output to stderr. Always written to logFile if one is open.
func printWarn(format string, a ...any) {
        fmt.Fprintf(os.Stderr, format, a...)
        if logFile != nil {
                fmt.Fprintf(logFile, format, a...)
        }
}

// printWarnln prints warning/error output with a newline to stderr. Always written to logFile if one is open.
func printWarnln(a ...any) {
        fmt.Fprintln(os.Stderr, a...)
        if logFile != nil {
                fmt.Fprintln(logFile, a...)
        }
}

// Package daemon provides daemonization support for mkvdup FUSE mount.
//
// It uses a re-exec pattern where the parent process spawns a child with
// an environment variable marker. The child signals readiness to the parent
// via a pipe, allowing the parent to return success/failure appropriately.
package daemon

import (
        "errors"
        "fmt"
        "io"
        "os"
        "os/exec"
        "strconv"
        "strings"
        "syscall"
        "time"

        "golang.org/x/sys/unix"
)

// childEnvVar is the environment variable that marks a child daemon process.
const childEnvVar = "MKVDUP_DAEMON_CHILD"

// readyPipeFdEnvVar is the environment variable containing the pipe fd for signaling.
const readyPipeFdEnvVar = "MKVDUP_READY_PIPE_FD"

// Status codes sent from child to parent via the ready pipe.
const (
        statusReady byte = 0 // Mount successful
        statusError byte = 1 // Mount failed
)

// IsChild returns true if the current process is a daemon child.
func IsChild() bool {
        return os.Getenv(childEnvVar) == "1"
}

// Daemonize spawns the current executable as a background daemon.
// It waits for the child to signal readiness or error via a pipe.
// Returns nil on success (child signaled ready) or error on failure.
// The timeout specifies how long to wait for the child to signal.
func Daemonize(pidFile string, timeout time.Duration) error {
        // Create pipe for child to signal readiness
        readPipe, writePipe, err := os.Pipe()
        if err != nil {
                return fmt.Errorf("create pipe: %w", err)
        }
        defer readPipe.Close()

        // Build command with same arguments
        cmd := exec.Command(os.Args[0], os.Args[1:]...)

        // Set up environment
        cmd.Env = append(os.Environ(),
                childEnvVar+"=1",
                readyPipeFdEnvVar+"=3", // fd 3 is after stdin/stdout/stderr
        )

        // Pass write end of pipe to child as fd 3
        cmd.ExtraFiles = []*os.File{writePipe}

        // Detach from terminal
        cmd.Stdin = nil
        cmd.Stdout = nil
        cmd.Stderr = nil
        cmd.SysProcAttr = &syscall.SysProcAttr{
                Setsid: true, // Create new session
        }

        // Start child process
        if err := cmd.Start(); err != nil {
                writePipe.Close()
                return fmt.Errorf("start daemon: %w", err)
        }

        // Close write end in parent (child has it)
        writePipe.Close()

        // Wait for child to signal with timeout
        resultChan := make(chan error, 1)
        go func() {
                status := make([]byte, 1)
                n, err := readPipe.Read(status)
                if err != nil {
                        if errors.Is(err, io.EOF) {
                                resultChan <- fmt.Errorf("daemon child exited unexpectedly")
                        } else {
                                resultChan <- fmt.Errorf("read from child: %w", err)
                        }
                        return
                }
                if n != 1 {
                        resultChan <- fmt.Errorf("unexpected read size from child: %d", n)
                        return
                }

                if status[0] == statusReady {
                        resultChan <- nil
                } else {
                        // Read full error message until EOF to avoid truncation
                        errMsg, readErr := io.ReadAll(readPipe)
                        if readErr != nil && !errors.Is(readErr, io.EOF) {
                                resultChan <- fmt.Errorf("daemon failed (error reading message): %v", readErr)
                                return
                        }
                        if len(errMsg) > 0 {
                                resultChan <- fmt.Errorf("daemon failed: %s", string(errMsg))
                        } else {
                                resultChan <- fmt.Errorf("daemon failed with unknown error")
                        }
                }
        }()

        select {
        case err := <-resultChan:
                if err != nil {
                        // Try to clean up the child
                        if cmd.Process != nil {
                                cmd.Process.Kill()
                        }
                        return err
                }
                // Success - child is running and mount is ready
                if pidFile != "" {
                        // Write PID file from parent since child may not have permission
                        if err := WritePidFile(pidFile, cmd.Process.Pid); err != nil {
                                fmt.Fprintf(os.Stderr, "warning: failed to write pid file: %v\n", err)
                        }
                }
                return nil
        case <-time.After(timeout):
                // Close pipe to unblock the goroutine waiting on Read()
                readPipe.Close()
                if cmd.Process != nil {
                        cmd.Process.Kill()
                }
                return fmt.Errorf("daemon startup timed out after %v", timeout)
        }
}

// NotifyReady signals to the parent that the mount is ready.
// This should be called by the child after the FUSE mount is ready.
func NotifyReady() error {
        fd, err := getReadyPipeFd()
        if err != nil {
                return err
        }

        pipe := os.NewFile(fd, "ready-pipe")
        if pipe == nil {
                return fmt.Errorf("invalid pipe fd")
        }
        defer pipe.Close()

        _, err = pipe.Write([]byte{statusReady})
        return err
}

// NotifyError signals to the parent that the mount failed.
// This should be called by the child if an error occurs during startup.
func NotifyError(mountErr error) error {
        fd, err := getReadyPipeFd()
        if err != nil {
                return err
        }

        pipe := os.NewFile(fd, "ready-pipe")
        if pipe == nil {
                return fmt.Errorf("invalid pipe fd")
        }
        defer pipe.Close()

        // Write error status followed by error message
        _, err = pipe.Write([]byte{statusError})
        if err != nil {
                return err
        }
        _, err = pipe.Write([]byte(mountErr.Error()))
        return err
}

// getReadyPipeFd returns the file descriptor for the ready pipe.
func getReadyPipeFd() (uintptr, error) {
        fdStr := os.Getenv(readyPipeFdEnvVar)
        if fdStr == "" {
                return 0, fmt.Errorf("not running as daemon child")
        }
        fd, err := strconv.ParseUint(fdStr, 10, strconv.IntSize)
        if err != nil {
                return 0, fmt.Errorf("invalid pipe fd: %w", err)
        }
        return uintptr(fd), nil
}

// Detach closes stdin, stdout, and stderr to fully detach from the terminal.
// This should be called by the child after signaling ready.
func Detach() {
        // Redirect standard file descriptors to /dev/null
        devNull, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
        if err != nil {
                fmt.Fprintf(os.Stderr, "daemon: failed to open /dev/null: %v\n", err)
                return
        }

        // Replace stdin, stdout, stderr with /dev/null
        // Use unix.Dup2 for cross-architecture compatibility (syscall.Dup2 not available on arm64)
        // Errors are logged but not fatal since the daemon can still function
        if err := unix.Dup2(int(devNull.Fd()), int(os.Stdin.Fd())); err != nil {
                fmt.Fprintf(os.Stderr, "daemon: failed to redirect stdin: %v\n", err)
        }
        if err := unix.Dup2(int(devNull.Fd()), int(os.Stdout.Fd())); err != nil {
                fmt.Fprintf(os.Stderr, "daemon: failed to redirect stdout: %v\n", err)
        }
        if err := unix.Dup2(int(devNull.Fd()), int(os.Stderr.Fd())); err != nil {
                // stderr may already be redirected, best effort
                _ = err
        }
        if err := devNull.Close(); err != nil {
                // Can't log since stderr may be redirected
                _ = err
        }
}

// WritePidFile writes the given PID to a file.
func WritePidFile(path string, pid int) error {
        return os.WriteFile(path, []byte(strconv.Itoa(pid)+"\n"), 0644)
}

// RemovePidFile removes the PID file at the given path.
func RemovePidFile(path string) error {
        return os.Remove(path)
}

// ReadPidFile reads a PID from the given file path.
func ReadPidFile(path string) (int, error) {
        data, err := os.ReadFile(path)
        if err != nil {
                return 0, fmt.Errorf("read pid file: %w", err)
        }
        pidStr := strings.TrimSpace(string(data))
        pid, err := strconv.Atoi(pidStr)
        if err != nil {
                return 0, fmt.Errorf("invalid pid in %s: %w", path, err)
        }
        if pid <= 0 {
                return 0, fmt.Errorf("invalid pid %d in %s", pid, path)
        }
        return pid, nil
}

package dedup

import (
        "fmt"
        "log"
        "os"
        "path/filepath"
        "sort"
        "strings"
        "time"

        "github.com/bmatcuk/doublestar/v4"
        "github.com/stuckj/mkvdup/internal/security"
        "gopkg.in/yaml.v3"
)

// Config represents the contents of a .mkvdup.yaml file.
type Config struct {
        Name      string `yaml:"name"`
        DedupFile string `yaml:"dedup_file"`
        SourceDir string `yaml:"source_dir"`
}

// configFile is the internal YAML representation that supports includes
// and virtual_files in addition to the standard Config fields.
type configFile struct {
        Name           string              `yaml:"name,omitempty"`
        DedupFile      string              `yaml:"dedup_file,omitempty"`
        SourceDir      string              `yaml:"source_dir,omitempty"`
        Includes       []string            `yaml:"includes,omitempty"`
        VirtualFiles   []Config            `yaml:"virtual_files,omitempty"`
        OnErrorCommand *ErrorCommandConfig `yaml:"on_error_command,omitempty"`
}

// ErrorCommandConfig configures an external command to run when a source
// integrity issue is detected. Placeholders in command arguments (%source%,
// %files%, %event%) are substituted at runtime.
type ErrorCommandConfig struct {
        Command       CommandValue  `yaml:"command"`
        Timeout       time.Duration `yaml:"timeout,omitempty"`
        BatchInterval time.Duration `yaml:"batch_interval,omitempty"`
}

// applyDefaults fills in zero-value fields with sensible defaults.
func (c *ErrorCommandConfig) applyDefaults() {
        if c.Timeout <= 0 {
                c.Timeout = 30 * time.Second
        }
        if c.BatchInterval <= 0 {
                c.BatchInterval = 5 * time.Second
        }
}

// CommandValue supports both string and []string YAML formats.
// A string value is executed via "sh -c"; a list is executed directly.
type CommandValue struct {
        IsShell bool     // true if the original YAML was a string (run via sh -c)
        Args    []string // for shell: single element with the command string; for list: the arg list
}

// UnmarshalYAML implements custom unmarshaling for CommandValue.
func (c *CommandValue) UnmarshalYAML(value *yaml.Node) error {
        // Try string first
        if value.Kind == yaml.ScalarNode {
                var s string
                if err := value.Decode(&s); err != nil {
                        return err
                }
                if s == "" {
                        return fmt.Errorf("on_error_command: command must not be empty")
                }
                c.IsShell = true
                c.Args = []string{s}
                return nil
        }

        // Try list
        if value.Kind == yaml.SequenceNode {
                var list []string
                if err := value.Decode(&list); err != nil {
                        return err
                }
                if len(list) == 0 {
                        return fmt.Errorf("on_error_command: command list must not be empty")
                }
                c.IsShell = false
                c.Args = list
                return nil
        }

        return fmt.Errorf("on_error_command: command must be a string or list of strings")
}

// MarshalYAML implements custom marshaling for CommandValue.
// Shell commands (string form) are emitted as a scalar; list commands as a sequence.
func (c CommandValue) MarshalYAML() (interface{}, error) {
        if c.IsShell && len(c.Args) == 1 {
                return c.Args[0], nil
        }
        return c.Args, nil
}

// WriteConfig writes the .mkvdup.yaml config file.
func WriteConfig(configPath, name, dedupFile, sourceDir string) error {
        content := fmt.Sprintf(`# Auto-generated by mkvdup create
name: %q
dedup_file: %q
source_dir: %q
`, name, dedupFile, sourceDir)

        return os.WriteFile(configPath, []byte(content), 0644)
}

// ReadConfig reads a .mkvdup.yaml config file.
func ReadConfig(configPath string) (*Config, error) {
        data, err := os.ReadFile(configPath)
        if err != nil {
                return nil, fmt.Errorf("read config file: %w", err)
        }

        var config Config
        if err := yaml.Unmarshal(data, &config); err != nil {
                return nil, fmt.Errorf("parse config %s: %w", configPath, err)
        }

        if config.Name == "" || config.DedupFile == "" || config.SourceDir == "" {
                return nil, fmt.Errorf("invalid config: missing required fields")
        }

        return &config, nil
}

// ResolveConfigs reads config files and recursively expands includes and
// virtual_files into a flat list of Config entries. Cycle detection prevents
// infinite recursion from circular includes.
//
// If any config file contains an on_error_command block, the first one
// encountered (depth-first, in file order) is returned. Defaults are applied
// for omitted timeout and batch_interval fields.
//
// The returned loadedPaths slice contains the absolute, symlink-resolved paths
// of every config file that was successfully loaded (the keys of the cycle-detection
// set). This is useful for setting up file watchers on the loaded configs.
func ResolveConfigs(configPaths []string) (configs []Config, errorCmd *ErrorCommandConfig, loadedPaths []string, err error) {
        seen := make(map[string]bool)
        for _, p := range configPaths {
                cfgs, cmd, resolveErr := resolveConfig(p, seen)
                if resolveErr != nil {
                        return nil, nil, nil, resolveErr
                }
                configs = append(configs, cfgs...)
                if errorCmd == nil && cmd != nil {
                        errorCmd = cmd
                }
        }
        if errorCmd != nil {
                if len(errorCmd.Command.Args) == 0 {
                        return nil, nil, nil, fmt.Errorf("invalid on_error_command: missing command")
                }
                errorCmd.applyDefaults()
        }
        loadedPaths = make([]string, 0, len(seen))
        for p := range seen {
                loadedPaths = append(loadedPaths, p)
        }
        sort.Strings(loadedPaths)
        return configs, errorCmd, loadedPaths, nil
}

// configVisitor is called for each config file visited during the walk.
// realPath is the resolved absolute path of the config file.
// cf is the parsed config file contents.
// configDir is the directory containing the config file (for resolving relative paths).
// phase indicates when the visitor is called:
//   - "pre"  — before recursing into includes (for top-level config entries)
//   - "post" — after includes have been recursed (for virtual_files)
type configVisitor func(phase string, realPath string, cf *configFile, configDir string) error

// walkConfig recursively walks a config file tree, calling the visitor for each
// file. It handles path resolution, symlink resolution, cycle detection,
// ownership checks, YAML parsing, and includes glob expansion — the shared
// logic that all config resolution uses.
//
// The visitor is called twice per file: once with phase "pre" before processing
// includes, and once with phase "post" after. This preserves the original
// ordering: top-level entries → included configs → virtual_files.
// openConfigFile resolves a config path (abs, symlinks, ownership check),
// reads and parses the YAML. Returns the canonical real path, raw data,
// parsed config, and any error.
func openConfigFile(configPath string) (realPath string, data []byte, cf *configFile, err error) {
        absPath, err := filepath.Abs(configPath)
        if err != nil {
                return "", nil, nil, fmt.Errorf("resolve path %s: %w", configPath, err)
        }

        realPath, err = filepath.EvalSymlinks(absPath)
        if err != nil {
                return "", nil, nil, fmt.Errorf("resolve symlinks %s: %w", absPath, err)
        }

        if err := security.CheckFileOwnershipResolved(realPath); err != nil {
                return "", nil, nil, fmt.Errorf("config file %s: %w", realPath, err)
        }

        data, err = os.ReadFile(realPath)
        if err != nil {
                return "", nil, nil, fmt.Errorf("read config file %s: %w", realPath, err)
        }

        cf = &configFile{}
        if err := yaml.Unmarshal(data, cf); err != nil {
                return "", nil, nil, fmt.Errorf("parse config %s: %w", realPath, err)
        }

        return realPath, data, cf, nil
}

// validateConfigFields checks for partial top-level fields and invalid
// virtual_files entries. Returns an error if validation fails.
func validateConfigFields(realPath string, cf *configFile) error {
        hasName := cf.Name != ""
        hasDedup := cf.DedupFile != ""
        hasSource := cf.SourceDir != ""
        if (hasName || hasDedup || hasSource) && !(hasName && hasDedup && hasSource) {
                return fmt.Errorf("config %s: name, dedup_file, and source_dir must all be set if any is set", realPath)
        }
        for _, vf := range cf.VirtualFiles {
                if vf.Name == "" || vf.DedupFile == "" || vf.SourceDir == "" {
                        return fmt.Errorf("config %s: virtual_files entry missing required fields (name, dedup_file, source_dir)", realPath)
                }
        }
        return nil
}

func walkConfig(configPath string, seen map[string]bool, visit configVisitor) error {
        // openConfigFile resolves abs + symlinks, checks ownership, reads, parses.
        realPath, _, cf, err := openConfigFile(configPath)
        if err != nil {
                return err
        }

        // Cycle detection using the canonical path from openConfigFile.
        if seen[realPath] {
                log.Printf("warning: skipping already-seen config %s (cycle detection)", realPath)
                return nil
        }
        seen[realPath] = true

        configDir := filepath.Dir(realPath)

        // Pre-includes visit (top-level config entries).
        if err := visit("pre", realPath, cf, configDir); err != nil {
                return err
        }

        // Recurse into includes.
        for _, pattern := range cf.Includes {
                pattern = resolveRelative(configDir, pattern)
                matches, err := doublestar.FilepathGlob(pattern)
                if err != nil {
                        return fmt.Errorf("expand include pattern %q in %s: %w", pattern, realPath, err)
                }
                sort.Strings(matches)
                for _, match := range matches {
                        if err := walkConfig(match, seen, visit); err != nil {
                                return err
                        }
                }
        }

        // Post-includes visit (virtual_files).
        if err := visit("post", realPath, cf, configDir); err != nil {
                return err
        }

        return nil
}

// resolveConfig recursively resolves a single config file using walkConfig.
// Returns the file configs and the first on_error_command encountered (or nil).
func resolveConfig(configPath string, seen map[string]bool) ([]Config, *ErrorCommandConfig, error) {
        var configs []Config
        var errorCmd *ErrorCommandConfig

        err := walkConfig(configPath, seen, func(phase, realPath string, cf *configFile, configDir string) error {
                if phase == "pre" {
                        // Capture on_error_command from this file (first-wins across resolution).
                        if errorCmd == nil && cf.OnErrorCommand != nil {
                                errorCmd = cf.OnErrorCommand
                        }

                        // Validate fields and add top-level config entry if present.
                        if err := validateConfigFields(realPath, cf); err != nil {
                                return err
                        }
                        if cf.Name != "" && cf.DedupFile != "" && cf.SourceDir != "" {
                                configs = append(configs, Config{
                                        Name:      cf.Name,
                                        DedupFile: resolveRelative(configDir, cf.DedupFile),
                                        SourceDir: resolveRelative(configDir, cf.SourceDir),
                                })
                        }
                } else {
                        // Process virtual_files after includes have been resolved.
                        // Validation was already done in the "pre" phase.
                        for _, vf := range cf.VirtualFiles {
                                configs = append(configs, Config{
                                        Name:      vf.Name,
                                        DedupFile: resolveRelative(configDir, vf.DedupFile),
                                        SourceDir: resolveRelative(configDir, vf.SourceDir),
                                })
                        }
                }

                return nil
        })

        return configs, errorCmd, err
}

// BatchManifest represents the batch create manifest file format.
type BatchManifest struct {
        SourceDir string              `yaml:"source_dir"`
        Files     []BatchManifestFile `yaml:"files"`
}

// BatchManifestFile represents a single file entry in a batch manifest.
type BatchManifestFile struct {
        MKV       string `yaml:"mkv"`
        Output    string `yaml:"output"`
        Name      string `yaml:"name"`
        SourceDir string `yaml:"source_dir"`
}

// ReadBatchManifest reads and validates a batch manifest file.
// Relative paths are resolved against the manifest file's directory.
// Default values are applied for optional fields.
func ReadBatchManifest(manifestPath string) (*BatchManifest, error) {
        data, err := os.ReadFile(manifestPath)
        if err != nil {
                return nil, fmt.Errorf("read batch manifest: %w", err)
        }

        var manifest BatchManifest
        if err := yaml.Unmarshal(data, &manifest); err != nil {
                return nil, fmt.Errorf("parse batch manifest %s: %w", manifestPath, err)
        }

        if len(manifest.Files) == 0 {
                return nil, fmt.Errorf("batch manifest %s: files list is empty", manifestPath)
        }

        absPath, err := filepath.Abs(manifestPath)
        if err != nil {
                return nil, fmt.Errorf("resolve manifest path: %w", err)
        }
        manifestDir := filepath.Dir(absPath)

        // Resolve and normalize top-level source_dir relative to manifest (if set)
        if manifest.SourceDir != "" {
                manifest.SourceDir = filepath.Clean(resolveRelative(manifestDir, manifest.SourceDir))
        }

        // Validate and resolve each file entry
        for i := range manifest.Files {
                f := &manifest.Files[i]
                if f.MKV == "" {
                        return nil, fmt.Errorf("batch manifest %s: files[%d] missing required 'mkv' field", manifestPath, i)
                }
                f.MKV = resolveRelative(manifestDir, f.MKV)

                if f.Output == "" {
                        return nil, fmt.Errorf("batch manifest %s: files[%d] missing required 'output' field", manifestPath, i)
                }
                f.Output = resolveRelative(manifestDir, f.Output)

                // Default name to MKV basename
                if f.Name == "" {
                        f.Name = filepath.Base(f.MKV)
                }
                // Ensure name has .mkv extension
                if !strings.HasSuffix(strings.ToLower(f.Name), ".mkv") {
                        f.Name += ".mkv"
                }

                // Resolve and normalize per-file source_dir, fall back to top-level default
                if f.SourceDir != "" {
                        f.SourceDir = filepath.Clean(resolveRelative(manifestDir, f.SourceDir))
                } else if manifest.SourceDir != "" {
                        f.SourceDir = manifest.SourceDir
                } else {
                        return nil, fmt.Errorf("batch manifest %s: files[%d] has no source_dir (set per-file or top-level default)", manifestPath, i)
                }
        }

        return &manifest, nil
}

// resolveRelative resolves a path relative to baseDir. If the path is already
// absolute, it is returned unchanged.
func resolveRelative(baseDir, path string) string {
        if filepath.IsAbs(path) {
                return path
        }
        return filepath.Join(baseDir, path)
}

package dedup

import (
        "fmt"
        "path/filepath"
        "sort"

        "github.com/bmatcuk/doublestar/v4"
        "gopkg.in/yaml.v3"
)

// resolveIncludePaths reads standard config files and resolves their includes
// glob patterns into a sorted, deduplicated list of absolute file paths.
// It can be used to compute the explicit set of config files that contribute
// mappings from a wildcard-based configuration.
func resolveIncludePaths(configPaths []string) ([]string, error) {
        seen := make(map[string]bool)
        var files []string

        for _, configPath := range configPaths {
                err := walkConfig(configPath, seen, func(phase, realPath string, cf *configFile, _ string) error {
                        if phase != "pre" {
                                return nil
                        }

                        if err := validateConfigFields(realPath, cf); err != nil {
                                return err
                        }

                        // Collect paths of configs that contribute any mappings.
                        hasDirectMapping := cf.Name != "" && cf.DedupFile != "" && cf.SourceDir != ""
                        if hasDirectMapping || len(cf.VirtualFiles) > 0 {
                                files = append(files, realPath)
                        }
                        return nil
                })
                if err != nil {
                        return nil, err
                }
        }

        sort.Strings(files)
        return files, nil
}

// ExpandConfigFile reads a config file, resolves its includes glob patterns
// to explicit paths (single level, no recursion), and returns the expanded
// config as YAML bytes. All other settings (on_error_command, virtual_files,
// top-level name/dedup_file/source_dir) are preserved unchanged. The included
// files themselves are not modified — they can still contain their own globs.
func ExpandConfigFile(configPath string) ([]byte, error) {
        realPath, _, cf, err := openConfigFile(configPath)
        if err != nil {
                return nil, err
        }

        if err := validateConfigFields(realPath, cf); err != nil {
                return nil, err
        }

        // Validate on_error_command (same check as ResolveConfigs) so that
        // expand-config fails fast on invalid input.
        if cf.OnErrorCommand != nil && len(cf.OnErrorCommand.Command.Args) == 0 {
                return nil, fmt.Errorf("%s: on_error_command.command must not be empty", realPath)
        }

        // If there are no includes, marshal the parsed config (not raw data) to
        // ensure consistent output formatting and avoid accumulating headers when
        // expand-config is run on already-expanded output.
        if len(cf.Includes) == 0 {
                return yaml.Marshal(cf)
        }

        // Resolve each include glob pattern to explicit paths (single level only).
        configDir := filepath.Dir(realPath)
        seen := make(map[string]bool)
        var resolved []string
        for _, pattern := range cf.Includes {
                pattern = resolveRelative(configDir, pattern)
                matches, err := doublestar.FilepathGlob(pattern)
                if err != nil {
                        return nil, fmt.Errorf("expand include pattern %q in %s: %w", pattern, realPath, err)
                }
                sort.Strings(matches)
                for _, match := range matches {
                        abs, err := filepath.Abs(match)
                        if err != nil {
                                return nil, fmt.Errorf("resolve path %s: %w", match, err)
                        }
                        // Canonicalize via EvalSymlinks for dedup, matching walkConfig's behavior.
                        canon := abs
                        if real, err := filepath.EvalSymlinks(abs); err == nil {
                                canon = real
                        }
                        if !seen[canon] {
                                seen[canon] = true
                                resolved = append(resolved, canon)
                        }
                }
        }

        // Replace includes with the resolved explicit paths (sorted globally).
        sort.Strings(resolved)
        cf.Includes = resolved

        out, err := yaml.Marshal(cf)
        if err != nil {
                return nil, fmt.Errorf("marshal expanded config: %w", err)
        }

        return out, nil
}

// Package dedup provides reading and writing of .mkvdup deduplication files.
package dedup

import (
        "encoding/binary"

        "github.com/stuckj/mkvdup/internal/matcher"
        "github.com/stuckj/mkvdup/internal/source"
)

// File format constants
const (
        Magic   = "MKVDUP01"
        Version = 3 // v3: Source field expanded to uint16 for >256 source files
        // VersionRangeMap is the version for files with embedded range maps.
        // Entries use ES offsets; a range map section maps ES offsets to raw file offsets.
        VersionRangeMap uint32 = 4
        // VersionCreator is V3 with a creator version string after the header.
        VersionCreator uint32 = 5
        // VersionRangeMapCreator is V4 with a creator version string after the header.
        VersionRangeMapCreator uint32 = 6
        // VersionUsed is V7: V5 with a per-source-file Used byte after the checksum.
        VersionUsed uint32 = 7
        // VersionRangeMapUsed is V8: V6 with a per-source-file Used byte after the checksum.
        VersionRangeMapUsed uint32 = 8
        // HeaderSize = Magic(8) + Version(4) + Flags(4) + OriginalSize(8) + OriginalChecksum(8) +
        //              SourceType(1) + UsesESOffsets(1) + SourceFileCount(2) + EntryCount(8) +
        //              DeltaOffset(8) + DeltaSize(8) = 60 bytes
        HeaderSize           = 60
        EntrySize            = 28 // Fixed entry size: 8+8+2+8+1+1 = 28 bytes
        FooterSize           = 24
        FooterV4Size         = 32 // V4 footer adds RangeMapChecksum (8 bytes)
        MagicSize            = 8
        VersionSize          = 4
        MaxCreatorVersionLen = 4096 // Max bytes for creator version string (writer truncates, reader rejects)
)

// Source types
const (
        SourceTypeDVD    uint8 = 0
        SourceTypeBluray uint8 = 1
)

// Header represents the fixed header at the start of a .mkvdup file.
type Header struct {
        Magic            [8]byte // "MKVDUP01"
        Version          uint32  // File format version
        Flags            uint32  // Reserved for future use
        OriginalSize     int64   // Size of original MKV file
        OriginalChecksum uint64  // xxhash of original MKV file
        SourceType       uint8   // 0=DVD, 1=Blu-ray
        UsesESOffsets    uint8   // 1 if source uses ES offsets (MPEG-PS)
        SourceFileCount  uint16  // Number of source files
        EntryCount       uint64  // Number of index entries
        DeltaOffset      int64   // Offset to delta section
        DeltaSize        int64   // Size of delta section
}

// SourceFile represents a source file entry in the dedup file.
type SourceFile struct {
        RelativePath string // Path relative to source directory
        Size         int64  // File size
        Checksum     uint64 // xxhash of file
        Used         bool   // Whether this source file is referenced by any entry (V7/V8 only)
}

// Entry represents an index entry in the dedup file.
// This mirrors matcher.Entry but is specifically for serialization.
type Entry struct {
        MkvOffset        int64  // Start offset in the MKV file
        Length           int64  // Length of this region
        Source           uint16 // 0 = delta, 1+ = source file index + 1 (supports up to 65535 files)
        SourceOffset     int64  // Offset in source file (or ES offset)
        IsVideo          bool   // For ES-based sources
        AudioSubStreamID byte   // For ES-based audio sub-streams
        IsLPCM           bool   // True if 16-bit LPCM audio requiring byte-swap on read
}

// RawEntry matches the 28-byte on-disk entry format exactly.
// Uses byte arrays for int64 fields to handle unaligned access portably.
// This enables direct memory-mapped access without parsing into []Entry.
type RawEntry struct {
        MkvOffset        [8]byte // int64, little-endian
        Length           [8]byte // int64, little-endian
        Source           [2]byte // uint16, little-endian
        SourceOffset     [8]byte // int64, little-endian (unaligned at byte 18)
        ESFlags          uint8   // bit 0 = IsVideo
        AudioSubStreamID uint8
}

// ESFlags bit layout:
//
//        bit 0: IsVideo
//        bit 1: IsLPCM (16-bit LPCM requiring byte-swap on read)
//        bits 2-7: reserved

// ToEntry converts a RawEntry to an Entry by parsing the byte arrays.
func (r *RawEntry) ToEntry() Entry {
        e := Entry{
                MkvOffset:        int64(binary.LittleEndian.Uint64(r.MkvOffset[:])),
                Length:           int64(binary.LittleEndian.Uint64(r.Length[:])),
                Source:           binary.LittleEndian.Uint16(r.Source[:]),
                SourceOffset:     int64(binary.LittleEndian.Uint64(r.SourceOffset[:])),
                IsVideo:          r.ESFlags&1 == 1,
                AudioSubStreamID: r.AudioSubStreamID,
                IsLPCM:           r.ESFlags&2 != 0,
        }
        return e
}

// Footer represents the footer at the end of a .mkvdup file.
type Footer struct {
        IndexChecksum    uint64  // xxhash of index section
        DeltaChecksum    uint64  // xxhash of delta section
        RangeMapChecksum uint64  // xxhash of range map section (V4 only; 0 for V3)
        Magic            [8]byte // "MKVDUP01" (for reverse scanning)
}

// File represents a complete dedup file structure for reconstruction.
// Note: Entries are accessed directly from mmap via Reader.getEntry(),
// not stored in this struct, to avoid large memory allocation.
type File struct {
        Header         Header
        SourceFiles    []SourceFile
        DeltaOffset    int64 // Offset to delta section in file
        UsesESOffsets  bool
        CreatorVersion string // Version of mkvdup that created this file (V5+ only)
        headerSize     int64  // Effective header size (60 for V3/V4, 60+2+len for V5-V8)
}

// creatorVersionSize returns the on-disk size of the creator version field.
func creatorVersionSize(v string) int64 {
        if v == "" {
                return 0
        }
        return 2 + int64(len(v))
}

// ToMatcherEntry converts a dedup Entry to a matcher Entry.
func (e *Entry) ToMatcherEntry() matcher.Entry {
        return matcher.Entry{
                MkvOffset:        e.MkvOffset,
                Length:           e.Length,
                Source:           e.Source,
                SourceOffset:     e.SourceOffset,
                IsVideo:          e.IsVideo,
                AudioSubStreamID: e.AudioSubStreamID,
                IsLPCM:           e.IsLPCM,
        }
}

// FromMatcherEntry creates a dedup Entry from a matcher Entry.
func FromMatcherEntry(e matcher.Entry) Entry {
        return Entry{
                MkvOffset:        e.MkvOffset,
                Length:           e.Length,
                Source:           e.Source,
                SourceOffset:     e.SourceOffset,
                IsVideo:          e.IsVideo,
                AudioSubStreamID: e.AudioSubStreamID,
                IsLPCM:           e.IsLPCM,
        }
}

// ToSourceFile converts source.File to dedup SourceFile.
func ToSourceFile(sf source.File) SourceFile {
        return SourceFile{
                RelativePath: sf.RelativePath,
                Size:         sf.Size,
                Checksum:     sf.Checksum,
        }
}

package dedup

import (
        "bytes"
        "encoding/binary"
        "fmt"
        "io"
        "sort"
        "sync"

        "github.com/cespare/xxhash/v2"
        "github.com/stuckj/mkvdup/internal/mmap"
        "github.com/stuckj/mkvdup/internal/source"
)

// Range map constants
const (
        RangeMapMagic = "RNGEMAPX" // 8 bytes
        // rangeMapCoarseStep is how many entries per coarse index slot.
        // Binary search the coarse index, then seek within a block.
        rangeMapCoarseStep = 1024
)

// RangeMapStreamHeader identifies a stream within the range map section.
type RangeMapStreamHeader struct {
        FileIndex   uint16 // Source file index (0-based)
        StreamType  uint8  // 0 = video, 1 = audio
        SubStreamID uint8  // For audio: sub-stream ID
        EntryCount  uint32 // Number of range entries for this stream
}

// rangeMapCoarseEntry is one slot in the coarse ESOffset index.
type rangeMapCoarseEntry struct {
        esOffset     int64 // ES offset at the start of this entry
        fileOffset   int64 // raw file offset of this entry
        entryIndex   int   // logical entry index
        entrySize    int   // payload size of this entry
        byteOff      int   // byte offset in compressed data for next decode
        rleRemaining int   // default entries remaining after this one in current RLE run
}

// rangeMapCursor tracks position during sequential access through a compressed range map.
type rangeMapCursor struct {
        esOff   int64
        fileOff int64
        size    int
        rleRem  int
        pos     int // position in compressed data
}

// StreamRangeMap provides random access to a stream's range map using
// compressed delta+varint+RLE encoded data and a coarse in-memory ESOffset index.
type StreamRangeMap struct {
        compressedData []byte // compressed range data (zero-copy slice from mmap)
        entryCount     int
        defaultGap     int64
        defaultSize    int
        coarse         []rangeMapCoarseEntry // coarse ESOffset index for binary search
        totalSize      int64                 // total ES size (sum of all entry sizes)

        // Sequential read cursor cache — avoids redundant binary search + seeking
        // for reads at or near the previous position (common in FUSE sequential reads).
        // Protected by cursorMu for concurrent FUSE read safety.
        cursorMu          sync.Mutex
        cachedCursor      rangeMapCursor
        cachedCursorValid bool
}

// TotalESSize returns the total size of the elementary stream.
func (sm *StreamRangeMap) TotalESSize() int64 {
        return sm.totalSize
}

// --- Varint / Zigzag helpers ---

func zigzagEncode(v int64) uint64 {
        return uint64((v << 1) ^ (v >> 63))
}

func zigzagDecode(v uint64) int64 {
        return int64(v>>1) ^ -int64(v&1)
}

// --- Compressed encoding ---

// findDefaultsSampleSize is the maximum number of entries to examine when
// determining the most common gap and size. For typical media streams the
// pattern is consistent throughout, so a small sample is sufficient and
// avoids O(N) map operations on streams with hundreds of millions of entries.
const findDefaultsSampleSize = 10000

// findDefaults finds the most common gap and size in a range sequence.
// Uses sampling for large inputs to avoid expensive map operations.
// Returns (0, 0) if ranges are too small or values don't fit in uint16.
func findDefaults(ranges []source.PESPayloadRange) (defaultGap int64, defaultSize int) {
        if len(ranges) < 2 {
                if len(ranges) == 1 {
                        return 0, ranges[0].Size
                }
                return 0, 0
        }

        // Sample a prefix — patterns in PES streams are consistent throughout.
        sampleLen := len(ranges)
        if sampleLen > findDefaultsSampleSize {
                sampleLen = findDefaultsSampleSize
        }

        // Count gap frequencies (sample only)
        gapCounts := make(map[int64]int)
        for i := 1; i < sampleLen; i++ {
                prevEnd := ranges[i-1].FileOffset + int64(ranges[i-1].Size)
                gap := ranges[i].FileOffset - prevEnd
                gapCounts[gap]++
        }

        var bestGap int64
        bestGapCount := 0
        for gap, count := range gapCounts {
                if count > bestGapCount {
                        bestGap = gap
                        bestGapCount = count
                }
        }

        // Count size frequencies (sample only)
        sizeCounts := make(map[int]int)
        for i := 0; i < sampleLen; i++ {
                sizeCounts[ranges[i].Size]++
        }

        var bestSize int
        bestSizeCount := 0
        for size, count := range sizeCounts {
                if count > bestSizeCount {
                        bestSize = size
                        bestSizeCount = count
                }
        }

        // Clamp to uint16 range for on-disk storage; disable RLE if out of range
        if bestGap < 0 || bestGap > 65535 || bestSize > 65535 {
                return 0, 0
        }

        return bestGap, bestSize
}

// encodeCompressedRanges encodes PES payload ranges using delta+varint+RLE.
//
// Format:
//   - First entry: fileOffset (uvarint) + size (uvarint)
//   - Subsequent entries:
//   - 0x00 + count (uvarint): RLE run of count default entries
//   - (zigzag(delta)+1) (uvarint) + size (uvarint): explicit entry
//
// The +1 shift ensures explicit entries never start with 0x00.
// offsetFunc, if non-nil, converts parser-relative FileOffset values to
// source-file-relative offsets (e.g., adding ISO base offset). The encoded
// data always stores converted offsets.
func encodeCompressedRanges(ranges []source.PESPayloadRange, defaultGap int64, defaultSize int, offsetFunc func(int64) int64) []byte {
        if len(ranges) == 0 {
                return nil
        }

        // Use direct []byte append instead of bytes.Buffer to minimize overhead
        // in the hot loop. Initial capacity is generous for the header; the bulk
        // of the data compresses to very few bytes via RLE.
        out := make([]byte, 0, 256)
        var varintBuf [binary.MaxVarintLen64]byte

        // Resolve first entry's offset (apply conversion if needed)
        prevOff := ranges[0].FileOffset
        if offsetFunc != nil {
                prevOff = offsetFunc(prevOff)
        }

        // First entry: always explicit
        n := binary.PutUvarint(varintBuf[:], uint64(prevOff))
        out = append(out, varintBuf[:n]...)
        n = binary.PutUvarint(varintBuf[:], uint64(ranges[0].Size))
        out = append(out, varintBuf[:n]...)

        // Subsequent entries: RLE or explicit
        rleCount := 0

        for i := 1; i < len(ranges); i++ {
                curOff := ranges[i].FileOffset
                if offsetFunc != nil {
                        curOff = offsetFunc(curOff)
                }

                prevEnd := prevOff + int64(ranges[i-1].Size)
                gap := curOff - prevEnd

                if gap == defaultGap && ranges[i].Size == defaultSize {
                        rleCount++
                } else {
                        // Flush pending RLE
                        if rleCount > 0 {
                                out = append(out, 0x00)
                                n := binary.PutUvarint(varintBuf[:], uint64(rleCount))
                                out = append(out, varintBuf[:n]...)
                                rleCount = 0
                        }

                        predicted := prevEnd + defaultGap
                        delta := curOff - predicted
                        encoded := zigzagEncode(delta) + 1

                        n := binary.PutUvarint(varintBuf[:], encoded)
                        out = append(out, varintBuf[:n]...)
                        n = binary.PutUvarint(varintBuf[:], uint64(ranges[i].Size))
                        out = append(out, varintBuf[:n]...)
                }

                prevOff = curOff
        }

        // Flush final RLE
        if rleCount > 0 {
                out = append(out, 0x00)
                n := binary.PutUvarint(varintBuf[:], uint64(rleCount))
                out = append(out, varintBuf[:n]...)
        }

        return out
}

// --- Compressed decoding ---

// buildStreamRangeMap creates a StreamRangeMap from compressed data.
// It decodes the entire stream once to build a coarse ESOffset index.
func buildStreamRangeMap(compressedData []byte, entryCount int, defaultGap int64, defaultSize int) (*StreamRangeMap, error) {
        if entryCount == 0 {
                return &StreamRangeMap{entryCount: 0}, nil
        }

        sm := &StreamRangeMap{
                compressedData: compressedData,
                entryCount:     entryCount,
                defaultGap:     defaultGap,
                defaultSize:    defaultSize,
        }

        // Build coarse index by iterating through all entries
        coarseCount := (entryCount + rangeMapCoarseStep - 1) / rangeMapCoarseStep
        sm.coarse = make([]rangeMapCoarseEntry, 0, coarseCount)

        // Decode first entry
        pos := 0
        fo, n := binary.Uvarint(compressedData[pos:])
        if n <= 0 {
                return nil, fmt.Errorf("truncated first entry fileOffset")
        }
        pos += n
        sz, n := binary.Uvarint(compressedData[pos:])
        if n <= 0 {
                return nil, fmt.Errorf("truncated first entry size")
        }
        pos += n

        var esOff int64
        fileOff := int64(fo)
        entSize := int(sz)
        rleRem := 0

        // Record coarse entry for entry 0
        sm.coarse = append(sm.coarse, rangeMapCoarseEntry{
                esOffset: 0, fileOffset: fileOff, entryIndex: 0, entrySize: entSize,
                byteOff: pos, rleRemaining: 0,
        })

        // Iterate through entries 1..entryCount-1
        for i := 1; i < entryCount; i++ {
                prevEnd := fileOff + int64(entSize)
                esOff += int64(entSize)

                if rleRem > 0 {
                        // Still in RLE run
                        fileOff = prevEnd + defaultGap
                        entSize = defaultSize
                        rleRem--
                } else if pos < len(compressedData) && compressedData[pos] == 0x00 {
                        // RLE token
                        pos++
                        count, n := binary.Uvarint(compressedData[pos:])
                        if n <= 0 {
                                return nil, fmt.Errorf("truncated RLE count at entry %d", i)
                        }
                        pos += n
                        fileOff = prevEnd + defaultGap
                        entSize = defaultSize
                        rleRem = int(count) - 1
                } else if pos < len(compressedData) {
                        // Explicit entry
                        encoded, n := binary.Uvarint(compressedData[pos:])
                        if n <= 0 {
                                return nil, fmt.Errorf("truncated explicit delta at entry %d", i)
                        }
                        pos += n
                        szv, n := binary.Uvarint(compressedData[pos:])
                        if n <= 0 {
                                return nil, fmt.Errorf("truncated explicit size at entry %d", i)
                        }
                        pos += n
                        delta := zigzagDecode(encoded - 1)
                        fileOff = prevEnd + defaultGap + delta
                        entSize = int(szv)
                        rleRem = 0
                } else {
                        return nil, fmt.Errorf("unexpected end of compressed data at entry %d", i)
                }

                if i%rangeMapCoarseStep == 0 {
                        sm.coarse = append(sm.coarse, rangeMapCoarseEntry{
                                esOffset: esOff, fileOffset: fileOff, entryIndex: i, entrySize: entSize,
                                byteOff: pos, rleRemaining: rleRem,
                        })
                }
        }

        sm.totalSize = esOff + int64(entSize)

        return sm, nil
}

// advanceCursor moves the cursor forward by one entry.
func (sm *StreamRangeMap) advanceCursor(c *rangeMapCursor) error {
        prevEnd := c.fileOff + int64(c.size)
        c.esOff += int64(c.size)

        if c.rleRem > 0 {
                c.fileOff = prevEnd + sm.defaultGap
                c.size = sm.defaultSize
                c.rleRem--
                return nil
        }

        if c.pos >= len(sm.compressedData) {
                return fmt.Errorf("unexpected end of compressed data")
        }

        if sm.compressedData[c.pos] == 0x00 {
                c.pos++
                count, n := binary.Uvarint(sm.compressedData[c.pos:])
                if n <= 0 {
                        return fmt.Errorf("truncated RLE count")
                }
                c.pos += n
                c.fileOff = prevEnd + sm.defaultGap
                c.size = sm.defaultSize
                c.rleRem = int(count) - 1
        } else {
                encoded, n := binary.Uvarint(sm.compressedData[c.pos:])
                if n <= 0 {
                        return fmt.Errorf("truncated explicit delta")
                }
                c.pos += n
                szv, n := binary.Uvarint(sm.compressedData[c.pos:])
                if n <= 0 {
                        return fmt.Errorf("truncated explicit size")
                }
                c.pos += n
                delta := zigzagDecode(encoded - 1)
                c.fileOff = prevEnd + sm.defaultGap + delta
                c.size = int(szv)
                c.rleRem = 0
        }

        return nil
}

// seekTo positions a cursor at the entry containing esOffset.
// Uses the cached cursor for O(1) sequential reads, falling back to
// coarse index binary search + seek for random access.
func (sm *StreamRangeMap) seekTo(esOffset int64) (rangeMapCursor, error) {
        // Fast path: check if cached cursor is at or before the target.
        // Lock to get a consistent snapshot of the cached cursor.
        sm.cursorMu.Lock()
        cachedValid := sm.cachedCursorValid
        var cc rangeMapCursor
        if cachedValid {
                cc = sm.cachedCursor // Copy while holding lock
        }
        sm.cursorMu.Unlock()

        if cachedValid {
                curEnd := cc.esOff + int64(cc.size)
                if esOffset >= cc.esOff && esOffset < curEnd {
                        // Target is within the cached entry — use directly
                        return cc, nil
                }
                if esOffset >= curEnd {
                        // Target is ahead of cached cursor — try seeking forward.
                        // Only use this path if the target is "close" (within ~2 coarse blocks).
                        maxForwardSeek := int64(rangeMapCoarseStep*2) * int64(sm.defaultSize+1)
                        if maxForwardSeek > 0 && esOffset-curEnd < maxForwardSeek {
                                cur := cc
                                for cur.esOff+int64(cur.size) <= esOffset {
                                        // RLE fast path: use arithmetic to skip directly to the target entry
                                        // instead of advancing one-by-one through the RLE run.
                                        if cur.rleRem > 0 && sm.defaultSize > 0 {
                                                afterCurrent := cur.esOff + int64(cur.size)
                                                maxRLEES := afterCurrent + int64(cur.rleRem)*int64(sm.defaultSize)
                                                if esOffset < maxRLEES {
                                                        // k = entries to skip (1-based). k-1 in offset calc positions
                                                        // relative to afterCurrent (the start of entry 1 in the run).
                                                        k := int((esOffset-afterCurrent)/int64(sm.defaultSize)) + 1
                                                        if k > cur.rleRem {
                                                                k = cur.rleRem
                                                        }
                                                        stride := int64(sm.defaultSize) + sm.defaultGap
                                                        cur.esOff = afterCurrent + int64(k-1)*int64(sm.defaultSize)
                                                        cur.fileOff = cur.fileOff + int64(cur.size) + sm.defaultGap + int64(k-1)*stride
                                                        cur.size = sm.defaultSize
                                                        cur.rleRem -= k
                                                        continue
                                                }
                                                k := cur.rleRem
                                                stride := int64(sm.defaultSize) + sm.defaultGap
                                                cur.esOff = afterCurrent + int64(k-1)*int64(sm.defaultSize)
                                                cur.fileOff = cur.fileOff + int64(cur.size) + sm.defaultGap + int64(k-1)*stride
                                                cur.size = sm.defaultSize
                                                cur.rleRem = 0
                                                continue
                                        }
                                        if err := sm.advanceCursor(&cur); err != nil {
                                                return rangeMapCursor{}, fmt.Errorf("seek to ES offset %d: %w", esOffset, err)
                                        }
                                }
                                return cur, nil
                        }
                }
        }

        // Slow path: binary search the coarse index
        blockIdx := sort.Search(len(sm.coarse), func(i int) bool {
                return sm.coarse[i].esOffset > esOffset
        }) - 1
        if blockIdx < 0 {
                blockIdx = 0
        }

        ce := &sm.coarse[blockIdx]
        cur := rangeMapCursor{
                esOff:   ce.esOffset,
                fileOff: ce.fileOffset,
                size:    ce.entrySize,
                rleRem:  ce.rleRemaining,
                pos:     ce.byteOff,
        }

        for cur.esOff+int64(cur.size) <= esOffset {
                if cur.rleRem > 0 && sm.defaultSize > 0 {
                        afterCurrent := cur.esOff + int64(cur.size)
                        maxRLEES := afterCurrent + int64(cur.rleRem)*int64(sm.defaultSize)
                        if esOffset < maxRLEES {
                                k := int((esOffset-afterCurrent)/int64(sm.defaultSize)) + 1
                                if k > cur.rleRem {
                                        k = cur.rleRem
                                }
                                stride := int64(sm.defaultSize) + sm.defaultGap
                                cur.esOff = afterCurrent + int64(k-1)*int64(sm.defaultSize)
                                cur.fileOff = cur.fileOff + int64(cur.size) + sm.defaultGap + int64(k-1)*stride
                                cur.size = sm.defaultSize
                                cur.rleRem -= k
                                continue
                        }
                        k := cur.rleRem
                        stride := int64(sm.defaultSize) + sm.defaultGap
                        cur.esOff = afterCurrent + int64(k-1)*int64(sm.defaultSize)
                        cur.fileOff = cur.fileOff + int64(cur.size) + sm.defaultGap + int64(k-1)*stride
                        cur.size = sm.defaultSize
                        cur.rleRem = 0
                        continue
                }
                if err := sm.advanceCursor(&cur); err != nil {
                        return rangeMapCursor{}, fmt.Errorf("seek to ES offset %d: %w", esOffset, err)
                }
        }

        return cur, nil
}

// ReadData reads ES data at the given offset, copying into a new buffer.
// Uses the coarse index for fast binary search, RLE arithmetic for fast seeking.
func (sm *StreamRangeMap) ReadData(sourceData []byte, sourceSize int64, esOffset int64, size int) ([]byte, error) {
        if sm.entryCount == 0 {
                return nil, fmt.Errorf("empty range map")
        }

        cur, err := sm.seekTo(esOffset)
        if err != nil {
                return nil, err
        }

        // Read data, potentially spanning multiple entries
        result := make([]byte, 0, size)
        remaining := size

        for remaining > 0 {
                offsetInEntry := esOffset - cur.esOff
                if offsetInEntry < 0 {
                        return nil, fmt.Errorf("ES offset gap at ES %d", cur.esOff)
                }

                available := int64(cur.size) - offsetInEntry
                toRead := int64(remaining)
                if toRead > available {
                        toRead = available
                }

                srcStart := cur.fileOff + offsetInEntry
                srcEnd := srcStart + toRead
                if srcEnd > sourceSize {
                        return nil, fmt.Errorf("source read out of bounds: %d + %d > %d", srcStart, toRead, sourceSize)
                }
                result = append(result, sourceData[srcStart:srcEnd]...)

                remaining -= int(toRead)
                esOffset += toRead

                if remaining > 0 {
                        // RLE batch path: batch-copy full entries using stride arithmetic.
                        if cur.rleRem > 0 {
                                cur.esOff += int64(cur.size)
                                cur.fileOff += int64(cur.size) + sm.defaultGap
                                cur.size = sm.defaultSize
                                cur.rleRem--

                                stride := int64(sm.defaultSize) + sm.defaultGap
                                defSz := sm.defaultSize
                                defSz64 := int64(defSz)

                                // Calculate how many full entries we can batch-copy
                                batchCount := remaining / defSz
                                if maxRLE := cur.rleRem + 1; batchCount > maxRLE {
                                        batchCount = maxRLE
                                }

                                if batchCount > 0 {
                                        lastSrcEnd := cur.fileOff + int64(batchCount-1)*stride + defSz64
                                        if lastSrcEnd > sourceSize {
                                                return nil, fmt.Errorf("source read out of bounds: %d > %d",
                                                        lastSrcEnd, sourceSize)
                                        }
                                        off := len(result)
                                        result = result[:off+batchCount*defSz]
                                        stridedCopy(
                                                result[off:off+batchCount*defSz],
                                                sourceData[cur.fileOff:lastSrcEnd],
                                                batchCount, defSz, int(stride),
                                        )
                                        copied := batchCount * defSz
                                        remaining -= copied
                                        esOffset += int64(copied)
                                        if batchCount > 1 {
                                                advance := batchCount - 1
                                                cur.esOff += int64(advance) * defSz64
                                                cur.fileOff += int64(advance) * stride
                                                cur.rleRem -= advance
                                        }
                                }
                                continue
                        }

                        if err := sm.advanceCursor(&cur); err != nil {
                                return nil, fmt.Errorf("read spanning entries: %w", err)
                        }
                }
        }

        // Update cached cursor for next sequential read
        sm.cursorMu.Lock()
        sm.cachedCursor = cur
        sm.cachedCursorValid = true
        sm.cursorMu.Unlock()

        return result, nil
}

// ReadDataInto reads ES data at the given offset directly into dest, avoiding allocation.
// Returns the number of bytes written. Uses cached cursor for sequential reads.
//
// The source parameter provides read access to the source file. If source
// implements MmapData, the mmap'd byte slice is used for zero-copy reads.
// Otherwise, source.ReadAt is used (pread path for network filesystems).
func (sm *StreamRangeMap) ReadDataInto(source mmap.SourceFile, esOffset int64, dest []byte) (int, error) {
        if sm.entryCount == 0 {
                return 0, fmt.Errorf("empty range map")
        }

        sourceSize := source.Size()

        // Resolve mmap data once for the zero-copy fast path.
        var sourceData []byte
        if md, ok := source.(mmap.MmapData); ok {
                sourceData = md.Data()
        }

        cur, err := sm.seekTo(esOffset)
        if err != nil {
                return 0, err
        }

        // Read data directly into dest, potentially spanning multiple entries
        written := 0
        remaining := len(dest)

        for remaining > 0 {
                offsetInEntry := esOffset - cur.esOff
                if offsetInEntry < 0 {
                        return written, fmt.Errorf("ES offset gap at ES %d", cur.esOff)
                }

                available := int64(cur.size) - offsetInEntry
                toRead := int64(remaining)
                if toRead > available {
                        toRead = available
                }

                srcStart := cur.fileOff + offsetInEntry
                srcEnd := srcStart + toRead
                if srcEnd > sourceSize {
                        return written, fmt.Errorf("source read out of bounds: %d + %d > %d", srcStart, toRead, sourceSize)
                }
                if sourceData != nil {
                        copy(dest[written:], sourceData[srcStart:srcEnd])
                } else {
                        if n, err := source.ReadAt(dest[written:written+int(toRead)], srcStart); err != nil && !(n == int(toRead) && err == io.EOF) {
                                return written, fmt.Errorf("pread at %d: %w", srcStart, err)
                        }
                }

                written += int(toRead)
                remaining -= int(toRead)
                esOffset += toRead

                if remaining > 0 {
                        // RLE batch path: when the next entries are in an RLE run,
                        // batch-copy full entries using a single strided copy instead of
                        // calling copy()/advanceCursor per entry.
                        if cur.rleRem > 0 {
                                // Advance to next RLE entry (equivalent to one advanceCursor)
                                cur.esOff += int64(cur.size)
                                cur.fileOff += int64(cur.size) + sm.defaultGap
                                cur.size = sm.defaultSize
                                cur.rleRem--

                                stride := int64(sm.defaultSize) + sm.defaultGap
                                defSz := sm.defaultSize
                                defSz64 := int64(defSz)

                                // Calculate how many full entries we can batch-copy
                                batchCount := remaining / defSz
                                if maxRLE := cur.rleRem + 1; batchCount > maxRLE {
                                        batchCount = maxRLE
                                }

                                if batchCount > 0 {
                                        // Bounds check the entire batch
                                        lastSrcEnd := cur.fileOff + int64(batchCount-1)*stride + defSz64
                                        if lastSrcEnd > sourceSize {
                                                return written, fmt.Errorf("source read out of bounds: %d > %d",
                                                        lastSrcEnd, sourceSize)
                                        }
                                        if sourceData != nil {
                                                stridedCopy(
                                                        dest[written:written+batchCount*defSz],
                                                        sourceData[cur.fileOff:lastSrcEnd],
                                                        batchCount, defSz, int(stride),
                                                )
                                        } else {
                                                // Pread path: read the contiguous source region into a
                                                // temp buffer, then strided-copy into dest.
                                                // tmpSize is bounded by ~len(dest) * stride/defSz, which
                                                // for Blu-ray M2TS (192/188) is ≈1.02× the dest buffer.
                                                // Since dest comes from a FUSE read (typically 128KB, max
                                                // ~1MB), this allocation is small and short-lived. If
                                                // profiling shows GC pressure, consider a sync.Pool here.
                                                tmpSize := int(lastSrcEnd - cur.fileOff)
                                                tmp := make([]byte, tmpSize)
                                                if n, err := source.ReadAt(tmp, cur.fileOff); err != nil && !(n == tmpSize && err == io.EOF) {
                                                        return written, fmt.Errorf("pread batch at %d: %w", cur.fileOff, err)
                                                }
                                                stridedCopy(
                                                        dest[written:written+batchCount*defSz],
                                                        tmp,
                                                        batchCount, defSz, int(stride),
                                                )
                                        }
                                        copied := batchCount * defSz
                                        written += copied
                                        remaining -= copied
                                        esOffset += int64(copied)
                                        // Position cursor at the last copied entry
                                        if batchCount > 1 {
                                                advance := batchCount - 1
                                                cur.esOff += int64(advance) * defSz64
                                                cur.fileOff += int64(advance) * stride
                                                cur.rleRem -= advance
                                        }
                                }
                                continue
                        }

                        if err := sm.advanceCursor(&cur); err != nil {
                                return written, fmt.Errorf("read spanning entries: %w", err)
                        }
                }
        }

        // Update cached cursor for next sequential read
        sm.cursorMu.Lock()
        sm.cachedCursor = cur
        sm.cachedCursorValid = true
        sm.cursorMu.Unlock()

        return written, nil
}

// --- Deserialization (for Reader) ---

// SourceRangeMaps holds parsed range maps for one source file.
type SourceRangeMaps struct {
        FileIndex uint16
        VideoMap  *StreamRangeMap
        AudioMaps map[byte]*StreamRangeMap // keyed by sub-stream ID
}

// readRangeMapSection parses the range map section from mmap'd data.
// The data slice should point to the start of the range map section.
// Compressed data is zero-copy sliced from the input.
func readRangeMapSection(data []byte) ([]SourceRangeMaps, error) {
        if len(data) < 10 { // magic (8) + source count (2)
                return nil, fmt.Errorf("range map section too small: %d bytes", len(data))
        }

        // Verify magic
        if string(data[:8]) != RangeMapMagic {
                return nil, fmt.Errorf("invalid range map magic: %q", data[:8])
        }
        off := 8

        // Source count
        sourceCount := int(binary.LittleEndian.Uint16(data[off : off+2]))
        off += 2

        result := make([]SourceRangeMaps, 0, sourceCount)

        for s := 0; s < sourceCount; s++ {
                if off+3 > len(data) { // FileIndex (2) + StreamCount (1)
                        return nil, fmt.Errorf("truncated range map at source %d", s)
                }

                fileIndex := binary.LittleEndian.Uint16(data[off : off+2])
                off += 2
                streamCount := int(data[off])
                off++

                src := SourceRangeMaps{
                        FileIndex: fileIndex,
                        AudioMaps: make(map[byte]*StreamRangeMap),
                }

                for st := 0; st < streamCount; st++ {
                        if off+8 > len(data) { // StreamHeader size
                                return nil, fmt.Errorf("truncated stream header at source %d stream %d", s, st)
                        }

                        // Parse stream header
                        var hdr RangeMapStreamHeader
                        _ = binary.LittleEndian.Uint16(data[off : off+2]) // per-stream FileIndex (already tracked per source)
                        hdr.StreamType = data[off+2]
                        hdr.SubStreamID = data[off+3]
                        hdr.EntryCount = binary.LittleEndian.Uint32(data[off+4 : off+8])
                        off += 8

                        // Read compression parameters
                        if off+8 > len(data) { // DefaultGap(2) + DefaultSize(2) + CompressedDataSize(4)
                                return nil, fmt.Errorf("truncated compression params at source %d stream %d", s, st)
                        }
                        defGap := int64(binary.LittleEndian.Uint16(data[off : off+2]))
                        off += 2
                        defSize := int(binary.LittleEndian.Uint16(data[off : off+2]))
                        off += 2
                        compSize := int(binary.LittleEndian.Uint32(data[off : off+4]))
                        off += 4

                        if off+compSize > len(data) {
                                return nil, fmt.Errorf("truncated compressed data at source %d stream %d: need %d bytes at offset %d, have %d total",
                                        s, st, compSize, off, len(data))
                        }

                        // Zero-copy slice into mmap'd data
                        compData := data[off : off+compSize]
                        off += compSize

                        sm, err := buildStreamRangeMap(compData, int(hdr.EntryCount), defGap, defSize)
                        if err != nil {
                                return nil, fmt.Errorf("build range map for source %d stream %d: %w", s, st, err)
                        }

                        if hdr.StreamType == 0 {
                                src.VideoMap = sm
                        } else {
                                src.AudioMaps[hdr.SubStreamID] = sm
                        }
                }

                result = append(result, src)
        }

        return result, nil
}

// --- Serialization (for Writer) ---

// RangeMapData holds the range map data for all streams of one source file,
// ready for serialization into the dedup file.
type RangeMapData struct {
        FileIndex    uint16
        VideoRanges  []source.PESPayloadRange
        AudioStreams []AudioRangeData
        OffsetFunc   func(int64) int64 // optional: converts parser-relative to source-file-relative FileOffset
}

// AudioRangeData holds range data for one audio sub-stream.
type AudioRangeData struct {
        SubStreamID byte
        Ranges      []source.PESPayloadRange
}

// encodeRangeMapSection encodes the entire range map section to a byte buffer.
// This is called before writing to determine the exact size and compute the checksum.
func encodeRangeMapSection(rangeMaps []RangeMapData) ([]byte, error) {
        var buf bytes.Buffer

        // Magic
        buf.Write([]byte(RangeMapMagic))

        // Source count
        var tmp [8]byte
        binary.LittleEndian.PutUint16(tmp[:2], uint16(len(rangeMaps)))
        buf.Write(tmp[:2])

        // Write each source's stream range maps
        for _, rm := range rangeMaps {
                // Count streams
                streamCount := uint8(0)
                if len(rm.VideoRanges) > 0 {
                        streamCount++
                }
                streamCount += uint8(len(rm.AudioStreams))

                binary.LittleEndian.PutUint16(tmp[:2], rm.FileIndex)
                buf.Write(tmp[:2])
                buf.WriteByte(streamCount)

                // Video stream
                if len(rm.VideoRanges) > 0 {
                        writeCompressedStream(&buf, rm.FileIndex, 0, 0, rm.VideoRanges, rm.OffsetFunc)
                }

                // Audio streams
                for _, audio := range rm.AudioStreams {
                        writeCompressedStream(&buf, rm.FileIndex, 1, audio.SubStreamID, audio.Ranges, rm.OffsetFunc)
                }
        }

        return buf.Bytes(), nil
}

// writeCompressedStream writes one stream's compressed range data.
// offsetFunc, if non-nil, converts parser-relative FileOffset values to
// source-file-relative offsets during encoding.
func writeCompressedStream(buf *bytes.Buffer, fileIndex uint16, streamType uint8, subStreamID byte, ranges []source.PESPayloadRange, offsetFunc func(int64) int64) {
        // Stream header: FileIndex(2) + StreamType(1) + SubStreamID(1) + EntryCount(4) = 8 bytes
        var hdrBuf [16]byte
        binary.LittleEndian.PutUint16(hdrBuf[0:2], fileIndex)
        hdrBuf[2] = streamType
        hdrBuf[3] = subStreamID
        binary.LittleEndian.PutUint32(hdrBuf[4:8], uint32(len(ranges)))
        buf.Write(hdrBuf[:8])

        // Find defaults (parser-relative offsets — gaps are the same in both domains
        // for the common non-boundary case, which dominates the mode calculation)
        defGap, defSize := findDefaults(ranges)

        // Compression parameters: DefaultGap(2) + DefaultSize(2) + CompressedDataSize(4) = 8 bytes
        binary.LittleEndian.PutUint16(hdrBuf[0:2], uint16(defGap))
        binary.LittleEndian.PutUint16(hdrBuf[2:4], uint16(defSize))

        // Encode compressed ranges (applies offsetFunc during encoding)
        compressed := encodeCompressedRanges(ranges, defGap, defSize, offsetFunc)

        binary.LittleEndian.PutUint32(hdrBuf[4:8], uint32(len(compressed)))
        buf.Write(hdrBuf[:8])
        buf.Write(compressed)
}

// writeRangeMapSection writes a pre-encoded range map buffer and returns its checksum.
// Used by the writer to write the range map section to the dedup file.
func writeRangeMapSection(w io.Writer, rangeMapBuf []byte) (uint64, error) {
        hasher := xxhash.New()
        hasher.Write(rangeMapBuf)

        if _, err := w.Write(rangeMapBuf); err != nil {
                return 0, err
        }

        return hasher.Sum64(), nil
}

package dedup

import (
        "encoding/binary"
        "fmt"
        "io"
        "os"
        "sort"
        "sync"
        "time"

        "github.com/cespare/xxhash/v2"
        "github.com/stuckj/mkvdup/internal/mmap"
        "github.com/stuckj/mkvdup/internal/security"
        "github.com/stuckj/mkvdup/internal/source"
        "golang.org/x/sys/unix"
)

// blockSize is the block size for the block index.
// Each block maps an MKV offset range to an entry index for O(1) lookup.
// 64KB balances memory overhead vs scan distance.
const blockSize = 64 * 1024

// Reader reads .mkvdup files and provides data reconstruction.
// Reader is safe for concurrent use from multiple goroutines.
type Reader struct {
        file        *File
        dedupMmap   *mmap.File
        dedupPath   string
        sourceDir   string
        sourceFiles []mmap.SourceFile
        esReader    ESReader  // For ES-based sources (v1 only, deprecated in v2)
        entriesOnce sync.Once // For lazy entry access initialization
        entriesErr  error     // Error from entry access initialization

        // Direct mmap access to entries (no []Entry allocation)
        indexStart int64 // Byte offset where entries begin in file
        entryCount int   // Number of entries

        // Block index for fast entry lookup on cache miss.
        // Maps block_number (MKV offset / blockSize) → entry index for O(1)
        // narrowing, followed by bounded binary search within the block range.
        // Built once in initEntryAccess; immutable after that (no mutex needed).
        blockIndex []int

        // Last-entry cache for O(1) sequential read lookup
        // Protected by cacheMu for concurrent access safety
        cacheMu        sync.Mutex
        lastEntryIdx   int   // Index of last accessed entry (-1 if none)
        lastEntry      Entry // The cached parsed entry
        lastEntryValid bool  // Whether lastEntry is valid

        // V4 range map data (maps ES offsets to raw file offsets)
        rangeMapsByFile map[int]*SourceRangeMaps // file index -> range maps
}

// ESReader interface for reading ES data from MPEG-PS sources.
type ESReader interface {
        ReadESData(esOffset int64, size int, isVideo bool) ([]byte, error)
        ReadAudioSubStreamData(subStreamID byte, esOffset int64, size int) ([]byte, error)
}

// NewReader opens a dedup file for reading with entry access initialized immediately.
// Use NewReaderLazy for faster initialization when entries can be accessed on first read.
func NewReader(dedupPath, sourceDir string) (*Reader, error) {
        r, err := NewReaderLazy(dedupPath, sourceDir)
        if err != nil {
                return nil, err
        }

        // Force immediate entry access initialization
        if err := r.initEntryAccess(); err != nil {
                r.Close()
                return nil, fmt.Errorf("init entry access: %w", err)
        }

        return r, nil
}

// NewReaderLazy opens a dedup file but only reads the header.
// Entries are loaded lazily on first Read. Use this for fast mount times with many files.
func NewReaderLazy(dedupPath, sourceDir string) (*Reader, error) {
        f, err := os.Open(dedupPath)
        if err != nil {
                return nil, fmt.Errorf("open dedup file: %w", err)
        }
        defer f.Close()

        file, err := parseHeaderOnly(f)
        if err != nil {
                return nil, fmt.Errorf("parse dedup header: %w", err)
        }

        // Memory-map the dedup file
        dedupMmap, err := mmap.Open(dedupPath)
        if err != nil {
                return nil, fmt.Errorf("mmap dedup file: %w", err)
        }

        return &Reader{
                file:         file,
                dedupMmap:    dedupMmap,
                dedupPath:    dedupPath,
                sourceDir:    sourceDir,
                lastEntryIdx: -1, // No entry cached yet
        }, nil
}

// SetESReader sets the ES reader for ES-based sources.
func (r *Reader) SetESReader(esReader ESReader) {
        r.esReader = esReader
}

// LoadSourceFiles memory-maps all source files.
func (r *Reader) LoadSourceFiles() error {
        r.sourceFiles = make([]mmap.SourceFile, len(r.file.SourceFiles))
        for i, sf := range r.file.SourceFiles {
                path, err := security.CheckPathConfinement(r.sourceDir, sf.RelativePath)
                if err != nil {
                        for j := 0; j < i; j++ {
                                if r.sourceFiles[j] != nil {
                                        r.sourceFiles[j].Close()
                                }
                        }
                        return fmt.Errorf("source file %s: %w", sf.RelativePath, err)
                }
                m, err := mmap.Open(path)
                if err != nil {
                        // Clean up already opened files
                        for j := 0; j < i; j++ {
                                if r.sourceFiles[j] != nil {
                                        r.sourceFiles[j].Close()
                                }
                        }
                        return fmt.Errorf("mmap source file %s: %w", sf.RelativePath, err)
                }
                // Hint sequential access so the kernel does aggressive readahead
                // instead of handling individual 4KB page faults.
                m.Advise(unix.MADV_SEQUENTIAL)
                r.sourceFiles[i] = m
        }
        return nil
}

// LoadSourceFilesPread opens all source files using pread(2) instead of mmap.
// This is used for source files on network filesystems where mmap is unsafe.
func (r *Reader) LoadSourceFilesPread(timeout time.Duration) error {
        r.sourceFiles = make([]mmap.SourceFile, len(r.file.SourceFiles))
        for i, sf := range r.file.SourceFiles {
                path, err := security.CheckPathConfinement(r.sourceDir, sf.RelativePath)
                if err != nil {
                        for j := 0; j < i; j++ {
                                if r.sourceFiles[j] != nil {
                                        r.sourceFiles[j].Close()
                                }
                        }
                        return fmt.Errorf("source file %s: %w", sf.RelativePath, err)
                }
                pf, err := mmap.OpenPread(path, timeout)
                if err != nil {
                        // Clean up already opened files
                        for j := 0; j < i; j++ {
                                if r.sourceFiles[j] != nil {
                                        r.sourceFiles[j].Close()
                                }
                        }
                        return fmt.Errorf("open source file %s: %w", sf.RelativePath, err)
                }
                r.sourceFiles[i] = pf
        }
        return nil
}

// Close releases all resources.
func (r *Reader) Close() error {
        if r.dedupMmap != nil {
                r.dedupMmap.Close()
        }
        for _, sf := range r.sourceFiles {
                if sf != nil {
                        sf.Close()
                }
        }
        return nil
}

// initEntryAccess initializes direct mmap access to entries (no parsing into []Entry).
// This is called lazily on first entry access.
func (r *Reader) initEntryAccess() error {
        r.entriesOnce.Do(func() {
                // Calculate index start offset
                r.indexStart = r.file.headerSize + r.calculateSourceFilesSize()
                r.entryCount = int(r.file.Header.EntryCount)

                // Validate mmap has enough data for all entries
                requiredSize := r.indexStart + int64(r.entryCount)*EntrySize
                if int64(r.dedupMmap.Size()) < requiredSize {
                        r.entriesErr = fmt.Errorf("mmap too small: need %d, have %d",
                                requiredSize, r.dedupMmap.Size())
                        return
                }

                // Build block index for fast random access lookup
                r.buildBlockIndex()

                // V4/V6/V8: parse range map section
                if r.hasRangeMaps() {
                        if err := r.initRangeMaps(); err != nil {
                                r.entriesErr = fmt.Errorf("init range maps: %w", err)
                                return
                        }
                }
        })
        return r.entriesErr
}

// initRangeMaps parses the range map section from the mmap'd dedup file.
func (r *Reader) initRangeMaps() error {
        // Range map section is between delta and footer
        rangeMapOffset := r.file.DeltaOffset + r.file.Header.DeltaSize
        fileSize := r.dedupMmap.Size()
        rangeMapSize := int(fileSize) - FooterV4Size - int(rangeMapOffset)

        if rangeMapSize <= 0 {
                return fmt.Errorf("no range map section found (offset %d, file size %d)", rangeMapOffset, fileSize)
        }

        data := r.dedupMmap.Slice(rangeMapOffset, rangeMapSize)
        if data == nil {
                return fmt.Errorf("range map slice out of bounds")
        }

        sources, err := readRangeMapSection(data)
        if err != nil {
                return fmt.Errorf("parse range map section: %w", err)
        }

        r.rangeMapsByFile = make(map[int]*SourceRangeMaps, len(sources))
        for i := range sources {
                r.rangeMapsByFile[int(sources[i].FileIndex)] = &sources[i]
        }

        return nil
}

// hasRangeMaps returns true if this dedup file uses range maps (V4/V6/V8).
func (r *Reader) hasRangeMaps() bool {
        switch r.file.Header.Version {
        case VersionRangeMap, VersionRangeMapCreator, VersionRangeMapUsed:
                return true
        }
        return false
}

// HasRangeMaps returns true if this dedup file uses V4/V6/V8 range maps.
// This checks the header version (available immediately after NewReaderLazy)
// rather than the lazily-loaded range map data, so it's safe to call
// before the first ReadAt.
func (r *Reader) HasRangeMaps() bool {
        return r.hasRangeMaps()
}

// HasSourceUsedFlags returns true if the dedup file has per-source-file Used flags (V7+).
func (r *Reader) HasSourceUsedFlags() bool {
        switch r.file.Header.Version {
        case VersionUsed, VersionRangeMapUsed:
                return true
        }
        return false
}

// buildBlockIndex creates a mapping from block numbers to entry indices.
// Each block represents a fixed-size range of MKV offsets. The index maps
// block_number → the entry index whose region covers or precedes that block's
// start offset. This narrows binary search from O(log N) over all entries
// to O(log B) within a single block's entries.
//
// Algorithm: single pass over all entries, filling block slots as we go.
// Time: O(entryCount + blockCount), Space: O(blockCount).
func (r *Reader) buildBlockIndex() {
        originalSize := r.file.Header.OriginalSize
        if originalSize <= 0 || r.entryCount == 0 {
                return
        }

        blockCount := int((originalSize + blockSize - 1) / blockSize)
        r.blockIndex = make([]int, blockCount)

        entryIdx := 0
        for b := range blockCount {
                blockStart := int64(b) * blockSize
                // Advance entryIdx to the last entry whose MkvOffset <= blockStart.
                // For block 0 (blockStart=0), this stays at 0 since no entry precedes it.
                for entryIdx+1 < r.entryCount {
                        nextOffset, ok := r.getMkvOffset(entryIdx + 1)
                        if !ok || nextOffset > blockStart {
                                break
                        }
                        entryIdx++
                }
                r.blockIndex[b] = entryIdx
        }
}

// getEntry returns the entry at the given index by parsing from mmap.
// Uses cache for O(1) sequential access. Safe for concurrent use.
func (r *Reader) getEntry(idx int) (Entry, bool) {
        if idx < 0 || idx >= r.entryCount {
                return Entry{}, false
        }

        // Check cache first (with lock)
        r.cacheMu.Lock()
        if r.lastEntryValid && r.lastEntryIdx == idx {
                entry := r.lastEntry
                r.cacheMu.Unlock()
                return entry, true
        }
        r.cacheMu.Unlock()

        // Parse entry from mmap using RawEntry (no lock needed - mmap is read-only)
        offset := r.indexStart + int64(idx)*EntrySize
        data := r.dedupMmap.Slice(offset, EntrySize)
        if len(data) < EntrySize {
                return Entry{}, false
        }

        // Parse using RawEntry for portable unaligned access
        // Layout: MkvOffset(8) + Length(8) + Source(2) + SourceOffset(8) + ESFlags(1) + AudioSubStreamID(1) = 28
        var raw RawEntry
        copy(raw.MkvOffset[:], data[0:8])
        copy(raw.Length[:], data[8:16])
        copy(raw.Source[:], data[16:18])
        copy(raw.SourceOffset[:], data[18:26])
        raw.ESFlags = data[26]
        raw.AudioSubStreamID = data[27]

        entry := raw.ToEntry()

        // Update cache (with lock)
        r.cacheMu.Lock()
        r.lastEntryIdx = idx
        r.lastEntry = entry
        r.lastEntryValid = true
        r.cacheMu.Unlock()

        return entry, true
}

// getMkvOffset returns just the MkvOffset for entry at idx (for binary search).
// This avoids full entry parsing when only the offset is needed.
func (r *Reader) getMkvOffset(idx int) (int64, bool) {
        if idx < 0 || idx >= r.entryCount {
                return 0, false
        }

        offset := r.indexStart + int64(idx)*EntrySize
        data := r.dedupMmap.Slice(offset, 8) // Only read MkvOffset field (first 8 bytes)
        if len(data) < 8 {
                return 0, false
        }

        return int64(binary.LittleEndian.Uint64(data)), true
}

// getEntryLength returns just the Length for entry at idx (for binary search).
// This avoids full entry parsing when only offset and length are needed.
func (r *Reader) getEntryLength(idx int) (int64, bool) {
        if idx < 0 || idx >= r.entryCount {
                return 0, false
        }

        // Length is at offset 8 within each entry (after MkvOffset)
        offset := r.indexStart + int64(idx)*EntrySize + 8
        data := r.dedupMmap.Slice(offset, 8)
        if len(data) < 8 {
                return 0, false
        }

        return int64(binary.LittleEndian.Uint64(data)), true
}

// OriginalSize returns the size of the original MKV file.
func (r *Reader) OriginalSize() int64 {
        return r.file.Header.OriginalSize
}

// OriginalChecksum returns the checksum of the original MKV file.
func (r *Reader) OriginalChecksum() uint64 {
        return r.file.Header.OriginalChecksum
}

// SourceFiles returns the list of source files.
func (r *Reader) SourceFiles() []SourceFile {
        return r.file.SourceFiles
}

// EntryCount returns the number of index entries.
// Returns 0 if entry access initialization failed. Use InitEntryAccess() to check for errors.
func (r *Reader) EntryCount() int {
        r.initEntryAccess() // Ensure entryCount is initialized
        return r.entryCount
}

// GetEntry returns the entry at the given index.
// Returns false if the index is out of range or if entry access initialization failed.
func (r *Reader) GetEntry(idx int) (Entry, bool) {
        if err := r.initEntryAccess(); err != nil {
                return Entry{}, false
        }
        return r.getEntry(idx)
}

// InitEntryAccess explicitly initializes entry access and returns any error.
// This is useful when you need to check for initialization errors before calling
// methods like EntryCount() or Info() that silently return zero/empty on error.
func (r *Reader) InitEntryAccess() error {
        return r.initEntryAccess()
}

// UsesESOffsets returns true if this dedup file uses ES offsets.
func (r *Reader) UsesESOffsets() bool {
        return r.file.UsesESOffsets
}

// ReadAt reads reconstructed MKV data at the given offset.
func (r *Reader) ReadAt(buf []byte, offset int64) (int, error) {
        // Initialize entry access on first read (lazy initialization)
        if err := r.initEntryAccess(); err != nil {
                return 0, fmt.Errorf("init entry access: %w", err)
        }

        if offset >= r.file.Header.OriginalSize {
                return 0, io.EOF
        }

        totalRead := 0
        remaining := len(buf)
        originalOffset := offset // Preserve original offset for buffer position calculation

        // Limit read to file size
        if offset+int64(remaining) > r.file.Header.OriginalSize {
                remaining = int(r.file.Header.OriginalSize - offset)
        }

        endOffset := offset + int64(remaining)

        // Find starting entry index (zero-allocation inline lookup)
        startIdx := r.findStartEntry(offset)

        // Iterate entries directly — no []Entry allocation
        for i := startIdx; i < r.entryCount && remaining > 0; i++ {
                entry, ok := r.getEntry(i)
                if !ok || entry.MkvOffset >= endOffset {
                        break
                }

                // Calculate overlap
                entryEnd := entry.MkvOffset + entry.Length
                readStart := offset
                if readStart < entry.MkvOffset {
                        readStart = entry.MkvOffset
                }
                readEnd := offset + int64(remaining)
                if readEnd > entryEnd {
                        readEnd = entryEnd
                }

                readLen := int(readEnd - readStart)
                if readLen <= 0 {
                        continue
                }

                // Calculate offset within entry
                offsetInEntry := readStart - entry.MkvOffset
                sourceOffset := entry.SourceOffset + offsetInEntry

                // Calculate buffer position
                bufOffset := int(readStart - originalOffset)

                // Check if this is an LPCM entry needing byte-swap.
                // For LPCM entries, the source data is raw big-endian PCM; we must
                // byte-swap 16-bit pairs aligned to the entry start. Both the start
                // offset and read length may be misaligned to pair boundaries when
                // the caller's buffer doesn't align with entry boundaries.
                needsLPCMSwap := entry.Source != 0 && entry.IsLPCM && !(r.file.UsesESOffsets && r.esReader != nil)

                if needsLPCMSwap {
                        // Compute pair-aligned read range within the entry.
                        alignedOff := offsetInEntry
                        trimFront := 0
                        if alignedOff%2 == 1 {
                                alignedOff--
                                trimFront = 1
                        }
                        alignedLen := readLen + trimFront
                        entryRemaining := int(entry.Length - alignedOff)
                        if alignedLen%2 == 1 && alignedLen < entryRemaining {
                                alignedLen++
                        }

                        alignedSrcOff := entry.SourceOffset + alignedOff
                        tmp := make([]byte, alignedLen)
                        if err := r.lpcmAlignedRead(entry, alignedSrcOff, tmp); err != nil {
                                return totalRead, fmt.Errorf("read at offset %d: %w", readStart, err)
                        }
                        source.TransformLPCM16BE(tmp)
                        copy(buf[bufOffset:bufOffset+readLen], tmp[trimFront:trimFront+readLen])
                } else {
                        // Normal read path (non-LPCM)
                        if err := r.readEntry(entry, sourceOffset, readLen, buf[bufOffset:bufOffset+readLen]); err != nil {
                                return totalRead, fmt.Errorf("read at offset %d: %w", readStart, err)
                        }
                }

                totalRead += readLen
                remaining -= readLen
                offset = readEnd
        }

        if totalRead == 0 && len(buf) > 0 {
                return 0, io.EOF
        }

        return totalRead, nil
}

// findStartEntry returns the index of the first entry whose range covers offset.
// Uses the entry cache for O(1) sequential access, block index for O(1) narrowing,
// then bounded binary search. Zero allocations.
func (r *Reader) findStartEntry(offset int64) int {
        // Fast path: check if offset is within cached entry
        r.cacheMu.Lock()
        if r.lastEntryValid && r.lastEntryIdx >= 0 && r.lastEntryIdx < r.entryCount {
                if offset >= r.lastEntry.MkvOffset && offset < r.lastEntry.MkvOffset+r.lastEntry.Length {
                        idx := r.lastEntryIdx
                        r.cacheMu.Unlock()
                        return idx
                }
        }
        r.cacheMu.Unlock()

        // Use block index to narrow binary search range
        var lo, hi int
        if r.blockIndex != nil {
                blockNum := int(offset / blockSize)
                if blockNum >= len(r.blockIndex) {
                        blockNum = len(r.blockIndex) - 1
                }
                lo = r.blockIndex[blockNum]

                if blockNum+1 < len(r.blockIndex) {
                        hi = r.blockIndex[blockNum+1] + 1
                        if hi > r.entryCount {
                                hi = r.entryCount
                        }
                } else {
                        hi = r.entryCount
                }
        } else {
                lo = 0
                hi = r.entryCount
        }

        // Binary search within [lo, hi) for first entry whose range covers offset
        return lo + sort.Search(hi-lo, func(i int) bool {
                mkvOffset, ok := r.getMkvOffset(lo + i)
                if !ok {
                        return true
                }
                entryLen, ok := r.getEntryLength(lo + i)
                if !ok {
                        return true
                }
                return mkvOffset+entryLen > offset
        })
}

func (r *Reader) findEntriesForRange(offset, length int64) []Entry {
        if r.entryCount == 0 {
                return nil
        }

        endOffset := offset + length

        // Fast path: check if offset is within cached entry (with lock)
        r.cacheMu.Lock()
        if r.lastEntryValid && r.lastEntryIdx >= 0 && r.lastEntryIdx < r.entryCount {
                if offset >= r.lastEntry.MkvOffset && offset < r.lastEntry.MkvOffset+r.lastEntry.Length {
                        // Cache hit - start from cached entry
                        startIdx := r.lastEntryIdx
                        r.cacheMu.Unlock()

                        var result []Entry
                        for i := startIdx; i < r.entryCount; i++ {
                                entry, ok := r.getEntry(i)
                                if !ok || entry.MkvOffset >= endOffset {
                                        break
                                }
                                result = append(result, entry)
                        }
                        return result
                }
        }
        r.cacheMu.Unlock()

        // Cache miss - use block index to narrow binary search range
        var lo, hi int
        if r.blockIndex != nil {
                blockNum := int(offset / blockSize)
                if blockNum >= len(r.blockIndex) {
                        blockNum = len(r.blockIndex) - 1
                }
                lo = r.blockIndex[blockNum]

                // Upper bound: start of next block's entries (or entryCount)
                if blockNum+1 < len(r.blockIndex) {
                        // Search up to 1 past the next block's start entry to handle
                        // entries that span block boundaries
                        hi = r.blockIndex[blockNum+1] + 1
                        if hi > r.entryCount {
                                hi = r.entryCount
                        }
                } else {
                        hi = r.entryCount
                }
        } else {
                lo = 0
                hi = r.entryCount
        }

        // Binary search within [lo, hi) for first entry whose range covers offset
        idx := lo + sort.Search(hi-lo, func(i int) bool {
                mkvOffset, ok := r.getMkvOffset(lo + i)
                if !ok {
                        return true
                }
                entryLen, ok := r.getEntryLength(lo + i)
                if !ok {
                        return true
                }
                return mkvOffset+entryLen > offset
        })

        var result []Entry
        for i := idx; i < r.entryCount; i++ {
                entry, ok := r.getEntry(i)
                if !ok || entry.MkvOffset >= endOffset {
                        break
                }
                result = append(result, entry)
        }

        return result
}

// readEntry reads data from the appropriate source for a given entry into dest.
// Handles delta, V4 range map, V1 ES reader, and V3 raw source paths.
func (r *Reader) readEntry(entry Entry, sourceOffset int64, readLen int, dest []byte) error {
        if entry.Source == 0 {
                // Read from delta section (zero-copy mmap slice)
                data, err := r.readDelta(sourceOffset, readLen)
                if err != nil {
                        return err
                }
                copy(dest, data)
                return nil
        } else if r.rangeMapsByFile != nil {
                // V4: Read via range map directly into output buffer (no allocation)
                fileIndex := int(entry.Source - 1)
                return r.readViaRangeMapInto(fileIndex, entry, sourceOffset, dest)
        } else if r.file.UsesESOffsets && r.esReader != nil {
                // V1: Read from ES via external reader
                var data []byte
                var err error
                if entry.IsVideo {
                        data, err = r.esReader.ReadESData(sourceOffset, readLen, true)
                } else {
                        data, err = r.esReader.ReadAudioSubStreamData(entry.AudioSubStreamID, sourceOffset, readLen)
                }
                if err != nil {
                        return err
                }
                copy(dest, data)
                return nil
        }
        // V3: Read from raw source file
        fileIndex := int(entry.Source - 1)
        return r.readSourceInto(fileIndex, sourceOffset, dest)
}

// lpcmAlignedRead reads source data for an LPCM entry at the given (already
// pair-aligned) source offset. This is used for the odd-offset case where we
// need to read from one byte before the actual requested offset.
func (r *Reader) lpcmAlignedRead(entry Entry, alignedSrcOff int64, dest []byte) error {
        if r.rangeMapsByFile != nil {
                fileIndex := int(entry.Source - 1)
                return r.readViaRangeMapInto(fileIndex, entry, alignedSrcOff, dest)
        }
        // V3: Read from raw source file
        fileIndex := int(entry.Source - 1)
        return r.readSourceInto(fileIndex, alignedSrcOff, dest)
}

func (r *Reader) readDelta(offset int64, size int) ([]byte, error) {
        fileOffset := r.file.DeltaOffset + offset
        // Zero-copy slice from mmap'd data
        data := r.dedupMmap.Slice(fileOffset, size)
        if data == nil {
                return nil, fmt.Errorf("delta offset out of range")
        }
        return data, nil
}

// readViaRangeMapInto reads via range map directly into dest, avoiding allocation.
func (r *Reader) readViaRangeMapInto(fileIndex int, entry Entry, sourceOffset int64, dest []byte) error {
        src, ok := r.rangeMapsByFile[fileIndex]
        if !ok {
                return fmt.Errorf("no range map for source file %d", fileIndex)
        }

        if fileIndex < 0 || fileIndex >= len(r.sourceFiles) || r.sourceFiles[fileIndex] == nil {
                return fmt.Errorf("source file %d not loaded for range map read", fileIndex)
        }

        sf := r.sourceFiles[fileIndex]

        if entry.IsVideo {
                if src.VideoMap == nil {
                        return fmt.Errorf("no video range map for source file %d", fileIndex)
                }
                _, err := src.VideoMap.ReadDataInto(sf, sourceOffset, dest)
                return err
        }

        audioMap, ok := src.AudioMaps[entry.AudioSubStreamID]
        if !ok {
                return fmt.Errorf("no audio sub-stream %d range map for source file %d", entry.AudioSubStreamID, fileIndex)
        }
        _, err := audioMap.ReadDataInto(sf, sourceOffset, dest)
        return err
}

// readSourceInto reads source file data directly into dest.
func (r *Reader) readSourceInto(fileIndex int, offset int64, dest []byte) error {
        if fileIndex < 0 || fileIndex >= len(r.sourceFiles) {
                return fmt.Errorf("invalid file index: %d", fileIndex)
        }
        if r.sourceFiles[fileIndex] == nil {
                return fmt.Errorf("source file %d not loaded", fileIndex)
        }

        n, err := r.sourceFiles[fileIndex].ReadAt(dest, offset)
        if n == len(dest) {
                if err == io.EOF {
                        return nil
                }
                return err
        }
        // Short read: surface as unexpected EOF so FUSE returns EIO
        // instead of silently truncating.
        if err == nil || err == io.EOF {
                return io.ErrUnexpectedEOF
        }
        return err
}

// parseHeaderOnly parses just the header and source files (not entries) for fast initialization.
func parseHeaderOnly(r io.Reader) (*File, error) {
        file := &File{}

        // Read and verify magic
        magic := make([]byte, MagicSize)
        if _, err := io.ReadFull(r, magic); err != nil {
                return nil, fmt.Errorf("read magic: %w", err)
        }
        if string(magic) != Magic {
                return nil, fmt.Errorf("invalid magic: %s", magic)
        }
        copy(file.Header.Magic[:], magic)

        // Read version
        if err := binary.Read(r, binary.LittleEndian, &file.Header.Version); err != nil {
                return nil, fmt.Errorf("read version: %w", err)
        }
        // Support versions 3-8. Older versions must be recreated.
        switch file.Header.Version {
        case Version, VersionRangeMap, VersionCreator, VersionRangeMapCreator,
                VersionUsed, VersionRangeMapUsed:
                // OK
        case 1:
                return nil, fmt.Errorf("unsupported version 1 (uses ES offsets); please recreate with 'mkvdup create'")
        case 2:
                return nil, fmt.Errorf("unsupported version 2 (uses uint8 source index); please recreate with 'mkvdup create'")
        default:
                return nil, fmt.Errorf("unsupported version: %d (expected 3-8)", file.Header.Version)
        }

        // Read flags
        if err := binary.Read(r, binary.LittleEndian, &file.Header.Flags); err != nil {
                return nil, fmt.Errorf("read flags: %w", err)
        }

        // Read original size
        if err := binary.Read(r, binary.LittleEndian, &file.Header.OriginalSize); err != nil {
                return nil, fmt.Errorf("read original size: %w", err)
        }

        // Read original checksum
        if err := binary.Read(r, binary.LittleEndian, &file.Header.OriginalChecksum); err != nil {
                return nil, fmt.Errorf("read original checksum: %w", err)
        }

        // Read source type
        if err := binary.Read(r, binary.LittleEndian, &file.Header.SourceType); err != nil {
                return nil, fmt.Errorf("read source type: %w", err)
        }

        // Read uses ES offsets flag
        if err := binary.Read(r, binary.LittleEndian, &file.Header.UsesESOffsets); err != nil {
                return nil, fmt.Errorf("read uses ES offsets: %w", err)
        }
        file.UsesESOffsets = file.Header.UsesESOffsets == 1

        // Read source file count
        if err := binary.Read(r, binary.LittleEndian, &file.Header.SourceFileCount); err != nil {
                return nil, fmt.Errorf("read source file count: %w", err)
        }

        // Read entry count
        if err := binary.Read(r, binary.LittleEndian, &file.Header.EntryCount); err != nil {
                return nil, fmt.Errorf("read entry count: %w", err)
        }

        // Read delta offset
        if err := binary.Read(r, binary.LittleEndian, &file.Header.DeltaOffset); err != nil {
                return nil, fmt.Errorf("read delta offset: %w", err)
        }
        file.DeltaOffset = file.Header.DeltaOffset

        // Read delta size
        if err := binary.Read(r, binary.LittleEndian, &file.Header.DeltaSize); err != nil {
                return nil, fmt.Errorf("read delta size: %w", err)
        }

        // Read creator version string (V5/V6 only)
        file.headerSize = int64(HeaderSize)
        if file.Header.Version >= VersionCreator {
                var versionLen uint16
                if err := binary.Read(r, binary.LittleEndian, &versionLen); err != nil {
                        return nil, fmt.Errorf("read creator version length: %w", err)
                }
                if versionLen > MaxCreatorVersionLen {
                        return nil, fmt.Errorf("creator version length %d exceeds maximum (%d)", versionLen, MaxCreatorVersionLen)
                }
                if versionLen > 0 {
                        versionBytes := make([]byte, versionLen)
                        if _, err := io.ReadFull(r, versionBytes); err != nil {
                                return nil, fmt.Errorf("read creator version: %w", err)
                        }
                        file.CreatorVersion = string(versionBytes)
                }
                file.headerSize = int64(HeaderSize) + 2 + int64(versionLen)
        }

        // Read source files
        file.SourceFiles = make([]SourceFile, file.Header.SourceFileCount)
        for i := range file.SourceFiles {
                var pathLen uint16
                if err := binary.Read(r, binary.LittleEndian, &pathLen); err != nil {
                        return nil, fmt.Errorf("read path length: %w", err)
                }

                path := make([]byte, pathLen)
                if _, err := io.ReadFull(r, path); err != nil {
                        return nil, fmt.Errorf("read path: %w", err)
                }
                file.SourceFiles[i].RelativePath = string(path)

                if err := binary.Read(r, binary.LittleEndian, &file.SourceFiles[i].Size); err != nil {
                        return nil, fmt.Errorf("read file size: %w", err)
                }

                if err := binary.Read(r, binary.LittleEndian, &file.SourceFiles[i].Checksum); err != nil {
                        return nil, fmt.Errorf("read file checksum: %w", err)
                }

                // V7/V8: read used flag
                if file.Header.Version == VersionUsed || file.Header.Version == VersionRangeMapUsed {
                        var used uint8
                        if err := binary.Read(r, binary.LittleEndian, &used); err != nil {
                                return nil, fmt.Errorf("read file used flag: %w", err)
                        }
                        file.SourceFiles[i].Used = used == 1
                }
        }

        // Entries are accessed directly from mmap via Reader.getEntry()
        return file, nil
}

// VerifyIntegrity verifies the dedup file checksums.
func (r *Reader) VerifyIntegrity() error {
        // Initialize entry access to get entryCount
        if err := r.initEntryAccess(); err != nil {
                return fmt.Errorf("init entry access: %w", err)
        }

        footerSz := int64(FooterSize)
        if r.hasRangeMaps() {
                footerSz = int64(FooterV4Size)
        }

        fileSize := r.dedupMmap.Size()

        // Read footer from mmap
        footerOffset := fileSize - footerSz
        footerData := r.dedupMmap.Slice(footerOffset, int(footerSz))
        if footerData == nil {
                return fmt.Errorf("footer slice out of range")
        }

        var footer Footer
        off := 0
        footer.IndexChecksum = binary.LittleEndian.Uint64(footerData[off : off+8])
        off += 8
        footer.DeltaChecksum = binary.LittleEndian.Uint64(footerData[off : off+8])
        off += 8
        if r.hasRangeMaps() {
                footer.RangeMapChecksum = binary.LittleEndian.Uint64(footerData[off : off+8])
                off += 8
        }
        if string(footerData[off:off+MagicSize]) != Magic {
                return fmt.Errorf("invalid footer magic")
        }

        // Calculate and verify index checksum (zero-copy)
        indexSize := int(int64(r.entryCount) * EntrySize)
        indexData := r.dedupMmap.Slice(r.indexStart, indexSize)
        if indexData == nil {
                return fmt.Errorf("read index for checksum: slice out of range")
        }
        if xxhash.Sum64(indexData) != footer.IndexChecksum {
                return fmt.Errorf("index checksum mismatch")
        }

        // Calculate and verify delta checksum (zero-copy)
        deltaData := r.dedupMmap.Slice(r.file.DeltaOffset, int(r.file.Header.DeltaSize))
        if deltaData == nil {
                return fmt.Errorf("read delta for checksum: slice out of range")
        }
        if xxhash.Sum64(deltaData) != footer.DeltaChecksum {
                return fmt.Errorf("delta checksum mismatch")
        }

        // V4/V6: verify range map checksum
        if r.hasRangeMaps() {
                rangeMapOffset := r.file.DeltaOffset + r.file.Header.DeltaSize
                rangeMapSize := int(footerOffset - rangeMapOffset)
                if rangeMapSize > 0 {
                        rangeMapData := r.dedupMmap.Slice(rangeMapOffset, rangeMapSize)
                        if rangeMapData == nil {
                                return fmt.Errorf("read range map for checksum: slice out of range")
                        }
                        if xxhash.Sum64(rangeMapData) != footer.RangeMapChecksum {
                                return fmt.Errorf("range map checksum mismatch")
                        }
                }
        }

        return nil
}

func (r *Reader) calculateSourceFilesSize() int64 {
        var size int64
        hasUsed := r.HasSourceUsedFlags()
        for _, sf := range r.file.SourceFiles {
                size += 2 + int64(len(sf.RelativePath)) + 8 + 8
                if hasUsed {
                        size += 1
                }
        }
        return size
}

// Info returns a summary of the dedup file.
// If entry access initialization failed, the "error" key will contain the error message
// and "entry_count" will be 0.
func (r *Reader) Info() map[string]any {
        err := r.initEntryAccess() // Ensure entryCount is initialized
        info := map[string]any{
                "version":           r.file.Header.Version,
                "original_size":     r.file.Header.OriginalSize,
                "original_checksum": r.file.Header.OriginalChecksum,
                "source_type":       r.file.Header.SourceType,
                "uses_es_offsets":   r.file.UsesESOffsets,
                "has_range_maps":    r.rangeMapsByFile != nil,
                "source_file_count": len(r.file.SourceFiles),
                "entry_count":       r.entryCount,
                "delta_size":        r.file.Header.DeltaSize,
                "creator_version":   r.file.CreatorVersion,
        }
        if err != nil {
                info["error"] = err.Error()
        }
        return info
}

package dedup

// stridedCopy copies count blocks of payloadSize bytes from src into
// contiguous dst. Blocks in src are separated by stride bytes
// (stride >= payloadSize). This avoids per-block copy() call overhead
// when extracting many small payloads (e.g. 184-byte M2TS PES payloads
// at 192-byte stride).
func stridedCopy(dst, src []byte, count, payloadSize, stride int) {
        dp := 0
        sp := 0
        for i := 0; i < count; i++ {
                copy(dst[dp:dp+payloadSize], src[sp:sp+payloadSize])
                dp += payloadSize
                sp += stride
        }
}

package dedup

import (
        "bufio"
        "encoding/binary"
        "fmt"
        "io"
        "os"

        "github.com/cespare/xxhash/v2"
        "github.com/stuckj/mkvdup/internal/matcher"
        "github.com/stuckj/mkvdup/internal/source"
)

// Writer creates .mkvdup files.
type Writer struct {
        file           *os.File
        header         Header
        sourceFiles    []SourceFile
        entries        []Entry
        deltaData      []byte               // In-memory delta (for tests / small files)
        deltaFile      *matcher.DeltaWriter // File-backed delta (for large files)
        rangeMaps      []RangeMapData       // V4/V6: per-source-file range maps (nil for V3/V5)
        rangeMapBuf    []byte               // Pre-encoded range map section (set by EncodeRangeMaps)
        creatorVersion string               // Version string to embed in the file
}

// NewWriter creates a new dedup file writer.
func NewWriter(path string) (*Writer, error) {
        f, err := os.Create(path)
        if err != nil {
                return nil, fmt.Errorf("create file: %w", err)
        }
        return &Writer{file: f}, nil
}

// SetCreatorVersion sets the version string to embed in the file.
// When set, the writer produces V7 (or V8 if range maps are also set).
func (w *Writer) SetCreatorVersion(v string) {
        if len(v) > MaxCreatorVersionLen {
                v = v[:MaxCreatorVersionLen]
        }
        w.creatorVersion = v
}

// SetHeader sets the header information.
func (w *Writer) SetHeader(originalSize int64, originalChecksum uint64, sourceType source.Type) {
        copy(w.header.Magic[:], Magic)
        w.header.Version = Version // Default V3; upgraded to V7/V8 in resolveVersion()
        w.header.Flags = 0
        w.header.OriginalSize = originalSize
        w.header.OriginalChecksum = originalChecksum
        w.header.UsesESOffsets = 0 // v2 always uses raw offsets

        switch sourceType {
        case source.TypeDVD:
                w.header.SourceType = SourceTypeDVD
        case source.TypeBluray:
                w.header.SourceType = SourceTypeBluray
        }
}

// SetSourceFiles sets the source file list.
func (w *Writer) SetSourceFiles(files []source.File) {
        w.sourceFiles = make([]SourceFile, len(files))
        for i, sf := range files {
                w.sourceFiles[i] = ToSourceFile(sf)
        }
        w.header.SourceFileCount = uint16(len(files))
}

// SetRangeMaps sets the range map data for V4 format.
// When range maps are set, ES-offset entries are preserved (not converted to raw offsets)
// and a range map section is written to the dedup file for mapping ES offsets to
// raw file positions at read time.
func (w *Writer) SetRangeMaps(rangeMaps []RangeMapData) {
        w.rangeMaps = rangeMaps
        w.header.Version = VersionRangeMap // Default V4; upgraded to V8 in resolveVersion()
        w.header.UsesESOffsets = 1
}

// resolveVersion sets the final file version based on configured features.
func (w *Writer) resolveVersion() {
        if w.rangeMaps != nil {
                if w.creatorVersion != "" {
                        w.header.Version = VersionRangeMapUsed // V8
                } else {
                        w.header.Version = VersionRangeMap // V4
                }
        } else {
                if w.creatorVersion != "" {
                        w.header.Version = VersionUsed // V7
                } else {
                        w.header.Version = Version // V3
                }
        }
}

// computeUsedFlags scans entries and marks which source files are referenced.
func (w *Writer) computeUsedFlags() {
        for i := range w.sourceFiles {
                w.sourceFiles[i].Used = false
        }
        for _, e := range w.entries {
                if e.Source > 0 {
                        idx := int(e.Source - 1)
                        if idx < len(w.sourceFiles) {
                                w.sourceFiles[idx].Used = true
                        }
                }
        }
}

// EncodeRangeMaps pre-encodes the range map section. Call this before
// WriteWithProgress to avoid a CPU-intensive encoding phase with no progress
// output. Returns the encoded size. If range maps are nil, this is a no-op.
func (w *Writer) EncodeRangeMaps() (int64, error) {
        if w.rangeMaps == nil {
                return 0, nil
        }
        buf, err := encodeRangeMapSection(w.rangeMaps)
        if err != nil {
                return 0, fmt.Errorf("encode range maps: %w", err)
        }
        w.rangeMapBuf = buf
        return int64(len(buf)), nil
}

// SetMatchResult sets the match result (entries and delta).
// If esConverters is provided and non-empty, ES-offset entries will be converted
// to raw-offset entries, potentially splitting entries that span multiple ranges.
func (w *Writer) SetMatchResult(result *matcher.Result, esConverters []source.ESRangeConverter) error {
        // Convert matcher entries to dedup entries
        entries := make([]Entry, len(result.Entries))
        for i, e := range result.Entries {
                entries[i] = FromMatcherEntry(e)
        }

        // Convert ES offsets to raw offsets if we have converters.
        // Skip conversion for V4 (range maps handle the mapping at read time).
        if len(esConverters) > 0 && w.rangeMaps == nil {
                var err error
                entries, err = w.convertESToRawOffsets(entries, esConverters)
                if err != nil {
                        return fmt.Errorf("convert ES to raw offsets: %w", err)
                }
        }

        w.entries = entries
        w.header.EntryCount = uint64(len(w.entries))

        if result.DeltaFile != nil {
                w.deltaFile = result.DeltaFile
                w.header.DeltaSize = result.DeltaFile.Size()
        } else {
                w.deltaData = result.DeltaData
                w.header.DeltaSize = int64(len(result.DeltaData))
        }
        return nil
}

// convertESToRawOffsets converts ES-offset entries to raw-offset entries.
// Entries that span multiple PES payload ranges are split into multiple entries.
func (w *Writer) convertESToRawOffsets(entries []Entry, esConverters []source.ESRangeConverter) ([]Entry, error) {
        // Pre-allocate with ~2x capacity since entries typically expand to multiple raw ranges
        result := make([]Entry, 0, len(entries)*2)

        for _, entry := range entries {
                if entry.Source == 0 {
                        // Delta entry - no conversion needed
                        result = append(result, entry)
                        continue
                }

                // Get the ES converter for this source file
                fileIndex := int(entry.Source - 1)
                if fileIndex >= len(esConverters) || esConverters[fileIndex] == nil {
                        // No converter available - assume raw offsets already
                        result = append(result, entry)
                        continue
                }
                converter := esConverters[fileIndex]

                // Get raw ranges for this ES region
                var rawRanges []source.RawRange
                var err error
                if entry.IsVideo {
                        rawRanges, err = converter.RawRangesForESRegion(entry.SourceOffset, int(entry.Length), true)
                } else {
                        rawRanges, err = converter.RawRangesForAudioSubStream(entry.AudioSubStreamID, entry.SourceOffset, int(entry.Length))
                }
                if err != nil {
                        return nil, fmt.Errorf("convert entry at MKV offset %d: %w", entry.MkvOffset, err)
                }

                // Create one entry per raw range
                mkvOffset := entry.MkvOffset
                for _, rr := range rawRanges {
                        result = append(result, Entry{
                                MkvOffset:        mkvOffset,
                                Length:           int64(rr.Size),
                                Source:           entry.Source,
                                SourceOffset:     rr.FileOffset, // Raw file offset!
                                IsVideo:          entry.IsVideo,
                                AudioSubStreamID: entry.AudioSubStreamID,
                                IsLPCM:           entry.IsLPCM,
                        })
                        mkvOffset += int64(rr.Size)
                }
        }

        return result, nil
}

// WriteProgressFunc is called to report write progress.
type WriteProgressFunc func(written, total int64)

// Write writes the dedup file.
func (w *Writer) Write() error {
        return w.WriteWithProgress(nil)
}

// WriteWithProgress writes the dedup file with progress reporting.
func (w *Writer) WriteWithProgress(progress WriteProgressFunc) error {
        // Scan entries to compute per-source Used flags, then determine file version.
        w.computeUsedFlags()
        w.resolveVersion()

        // Use pre-encoded range maps if available (from EncodeRangeMaps),
        // otherwise encode now.
        rangeMapBuf := w.rangeMapBuf
        if rangeMapBuf == nil && w.rangeMaps != nil {
                var err error
                rangeMapBuf, err = encodeRangeMapSection(w.rangeMaps)
                if err != nil {
                        return fmt.Errorf("encode range maps: %w", err)
                }
        }

        // Calculate offsets and total size
        sourceFilesSize := w.calculateSourceFilesSize()
        cvSize := creatorVersionSize(w.creatorVersion)
        indexSize := int64(len(w.entries)) * EntrySize
        deltaOffset := int64(HeaderSize) + cvSize + sourceFilesSize + indexSize
        w.header.DeltaOffset = deltaOffset

        footerSize := int64(FooterSize)
        if rangeMapBuf != nil {
                footerSize = FooterV4Size
        }

        totalSize := deltaOffset + w.header.DeltaSize + int64(len(rangeMapBuf)) + footerSize
        var written int64

        // Write header (includes creator version for V5/V6)
        if err := w.writeHeader(); err != nil {
                return fmt.Errorf("write header: %w", err)
        }
        written += int64(HeaderSize) + cvSize

        // Write source files section
        if err := w.writeSourceFiles(); err != nil {
                return fmt.Errorf("write source files: %w", err)
        }
        written += sourceFilesSize

        // Write index entries and calculate checksum
        indexChecksum, err := w.writeEntriesWithProgress(progress, &written, totalSize)
        if err != nil {
                return fmt.Errorf("write entries: %w", err)
        }

        // Write delta data and calculate checksum
        deltaChecksum, err := w.writeDeltaWithProgress(progress, &written, totalSize)
        if err != nil {
                return fmt.Errorf("write delta: %w", err)
        }

        // Write range map section (V4 only)
        var rangeMapChecksum uint64
        if rangeMapBuf != nil {
                rangeMapChecksum, err = writeRangeMapSection(w.file, rangeMapBuf)
                if err != nil {
                        return fmt.Errorf("write range map: %w", err)
                }
                written += int64(len(rangeMapBuf))
                if progress != nil {
                        progress(written, totalSize)
                }
        }

        // Write footer
        if err := w.writeFooter(indexChecksum, deltaChecksum, rangeMapChecksum); err != nil {
                return fmt.Errorf("write footer: %w", err)
        }

        if progress != nil {
                progress(totalSize, totalSize)
        }

        return nil
}

// Close closes the writer.
func (w *Writer) Close() error {
        if w.file != nil {
                return w.file.Close()
        }
        return nil
}

func (w *Writer) calculateSourceFilesSize() int64 {
        var size int64
        hasUsed := w.header.Version == VersionUsed || w.header.Version == VersionRangeMapUsed
        for _, sf := range w.sourceFiles {
                // PathLen (2) + Path (variable) + Size (8) + Checksum (8) [+ Used (1)]
                size += 2 + int64(len(sf.RelativePath)) + 8 + 8
                if hasUsed {
                        size += 1
                }
        }
        return size
}

func (w *Writer) writeHeader() error {
        // Write magic
        if _, err := w.file.Write([]byte(Magic)); err != nil {
                return err
        }

        // Write version
        if err := binary.Write(w.file, binary.LittleEndian, w.header.Version); err != nil {
                return err
        }

        // Write flags
        if err := binary.Write(w.file, binary.LittleEndian, w.header.Flags); err != nil {
                return err
        }

        // Write original size
        if err := binary.Write(w.file, binary.LittleEndian, w.header.OriginalSize); err != nil {
                return err
        }

        // Write original checksum
        if err := binary.Write(w.file, binary.LittleEndian, w.header.OriginalChecksum); err != nil {
                return err
        }

        // Write source type
        if err := binary.Write(w.file, binary.LittleEndian, w.header.SourceType); err != nil {
                return err
        }

        // Write uses ES offsets flag
        if err := binary.Write(w.file, binary.LittleEndian, w.header.UsesESOffsets); err != nil {
                return err
        }

        // Write source file count
        if err := binary.Write(w.file, binary.LittleEndian, w.header.SourceFileCount); err != nil {
                return err
        }

        // Write entry count
        if err := binary.Write(w.file, binary.LittleEndian, w.header.EntryCount); err != nil {
                return err
        }

        // Write delta offset
        if err := binary.Write(w.file, binary.LittleEndian, w.header.DeltaOffset); err != nil {
                return err
        }

        // Write delta size
        if err := binary.Write(w.file, binary.LittleEndian, w.header.DeltaSize); err != nil {
                return err
        }

        // Write creator version string (V5/V6)
        if w.creatorVersion != "" {
                versionLen := uint16(len(w.creatorVersion))
                if err := binary.Write(w.file, binary.LittleEndian, versionLen); err != nil {
                        return err
                }
                if _, err := w.file.Write([]byte(w.creatorVersion)); err != nil {
                        return err
                }
        }

        return nil
}

func (w *Writer) writeSourceFiles() error {
        hasUsed := w.header.Version == VersionUsed || w.header.Version == VersionRangeMapUsed
        for _, sf := range w.sourceFiles {
                // Write path length
                pathLen := uint16(len(sf.RelativePath))
                if err := binary.Write(w.file, binary.LittleEndian, pathLen); err != nil {
                        return err
                }

                // Write path
                if _, err := w.file.Write([]byte(sf.RelativePath)); err != nil {
                        return err
                }

                // Write size
                if err := binary.Write(w.file, binary.LittleEndian, sf.Size); err != nil {
                        return err
                }

                // Write checksum
                if err := binary.Write(w.file, binary.LittleEndian, sf.Checksum); err != nil {
                        return err
                }

                // Write used flag (V7/V8)
                if hasUsed {
                        var used uint8
                        if sf.Used {
                                used = 1
                        }
                        if err := binary.Write(w.file, binary.LittleEndian, used); err != nil {
                                return err
                        }
                }
        }
        return nil
}

func (w *Writer) writeEntriesWithProgress(progress WriteProgressFunc, written *int64, total int64) (uint64, error) {
        hasher := xxhash.New()
        // Use buffered writer to batch syscalls (64KB buffer)
        bufWriter := bufio.NewWriterSize(w.file, 64*1024)
        writer := io.MultiWriter(bufWriter, hasher)

        entryCount := len(w.entries)
        lastProgress := 0

        // Reusable buffer for entry serialization (allocation-free per entry)
        var entryBuf [EntrySize]byte

        for i, entry := range w.entries {
                // Serialize entry to buffer using allocation-free Put* functions
                binary.LittleEndian.PutUint64(entryBuf[0:8], uint64(entry.MkvOffset))
                binary.LittleEndian.PutUint64(entryBuf[8:16], uint64(entry.Length))
                binary.LittleEndian.PutUint16(entryBuf[16:18], entry.Source)
                binary.LittleEndian.PutUint64(entryBuf[18:26], uint64(entry.SourceOffset))

                // ES flags byte: bit 0 = IsVideo, bit 1 = IsLPCM
                var esFlags uint8
                if entry.IsVideo {
                        esFlags |= 1
                }
                if entry.IsLPCM {
                        esFlags |= 2
                }
                entryBuf[26] = esFlags
                entryBuf[27] = entry.AudioSubStreamID

                // Single write per entry
                if _, err := writer.Write(entryBuf[:]); err != nil {
                        return 0, err
                }

                *written += EntrySize

                // Report progress every 1% or 10000 entries
                if progress != nil && entryCount > 0 {
                        pct := (i + 1) * 100 / entryCount
                        if pct > lastProgress || (i+1)%10000 == 0 {
                                progress(*written, total)
                                lastProgress = pct
                        }
                }
        }

        // Flush buffered writer
        if err := bufWriter.Flush(); err != nil {
                return 0, err
        }

        return hasher.Sum64(), nil
}

func (w *Writer) writeDeltaWithProgress(progress WriteProgressFunc, written *int64, total int64) (uint64, error) {
        hasher := xxhash.New()
        const chunkSize = 64 * 1024 // 64KB chunks
        lastProgress := 0

        if w.deltaFile != nil {
                // Read from temp file and write to output
                f := w.deltaFile.File()
                if _, err := f.Seek(0, 0); err != nil {
                        return 0, fmt.Errorf("seek delta file: %w", err)
                }

                buf := make([]byte, chunkSize)
                for {
                        n, err := f.Read(buf)
                        if n > 0 {
                                chunk := buf[:n]
                                if _, werr := w.file.Write(chunk); werr != nil {
                                        return 0, werr
                                }
                                hasher.Write(chunk)
                                *written += int64(n)

                                if progress != nil && w.header.DeltaSize > 0 {
                                        pct := int((*written * 100) / total)
                                        if pct > lastProgress {
                                                progress(*written, total)
                                                lastProgress = pct
                                        }
                                }
                        }
                        if err == io.EOF {
                                break
                        }
                        if err != nil {
                                return 0, err
                        }
                }
        } else {
                // In-memory path (for tests / small files)
                data := w.deltaData
                for len(data) > 0 {
                        chunk := data
                        if len(chunk) > chunkSize {
                                chunk = data[:chunkSize]
                        }
                        data = data[len(chunk):]

                        if _, err := w.file.Write(chunk); err != nil {
                                return 0, err
                        }
                        hasher.Write(chunk)
                        *written += int64(len(chunk))

                        if progress != nil && w.header.DeltaSize > 0 {
                                pct := int((*written * 100) / total)
                                if pct > lastProgress {
                                        progress(*written, total)
                                        lastProgress = pct
                                }
                        }
                }
        }

        return hasher.Sum64(), nil
}

func (w *Writer) writeFooter(indexChecksum, deltaChecksum, rangeMapChecksum uint64) error {
        // Write index checksum
        if err := binary.Write(w.file, binary.LittleEndian, indexChecksum); err != nil {
                return err
        }

        // Write delta checksum
        if err := binary.Write(w.file, binary.LittleEndian, deltaChecksum); err != nil {
                return err
        }

        // Write range map checksum (V4 only)
        if w.rangeMaps != nil {
                if err := binary.Write(w.file, binary.LittleEndian, rangeMapChecksum); err != nil {
                        return err
                }
        }

        // Write magic
        if _, err := w.file.Write([]byte(Magic)); err != nil {
                return err
        }

        return nil
}

package fuse

import (
        "fmt"
        "path/filepath"
        "time"

        "github.com/stuckj/mkvdup/internal/dedup"
        "github.com/stuckj/mkvdup/internal/security"
        "github.com/stuckj/mkvdup/internal/source"
)

// Ensure adapters implement interfaces
var _ ReaderInitializer = (*dedupReaderAdapter)(nil)
var _ ReaderFactory = (*DefaultReaderFactory)(nil)
var _ ConfigReader = (*DefaultConfigReader)(nil)

// dedupReaderAdapter wraps dedup.Reader to implement ReaderInitializer interface.
type dedupReaderAdapter struct {
        reader      *dedup.Reader
        readTimeout time.Duration // pread timeout for network FS sources
        // index stores the source index for cleanup when using ES offsets.
        // This is nil when using raw source files.
        index *source.Index
}

func (a *dedupReaderAdapter) OriginalSize() int64 {
        return a.reader.OriginalSize()
}

func (a *dedupReaderAdapter) UsesESOffsets() bool {
        return a.reader.UsesESOffsets()
}

func (a *dedupReaderAdapter) InitializeForReading(sourceDir string) error {
        if a.reader.UsesESOffsets() && !a.reader.HasRangeMaps() {
                // Legacy guard: ES offsets without range maps would need a full
                // ES reader. No current format hits this path — DVD formats
                // (V3/V5/V7) use raw file offsets, and Blu-ray formats (V4/V6/V8)
                // always have range maps. Kept for safety against future formats.
                indexer, err := source.NewIndexer(sourceDir, source.DefaultWindowSize)
                if err != nil {
                        return fmt.Errorf("create indexer: %w", err)
                }
                if err := indexer.Build(nil); err != nil {
                        return fmt.Errorf("build index: %w", err)
                }
                index := indexer.Index()
                if len(index.ESReaders) > 0 {
                        a.reader.SetESReader(index.ESReaders[0])
                }
                // Store index for cleanup in Close()
                a.index = index
        } else if isNetworkFS(sourceDir) {
                // Network FS: use pread with retry instead of mmap to avoid SIGBUS.
                if err := a.reader.LoadSourceFilesPread(a.readTimeout); err != nil {
                        return fmt.Errorf("load source files (pread): %w", err)
                }
        } else {
                // Local FS: mmap for zero-copy performance.
                // Range maps handle ES-to-raw translation at read time.
                if err := a.reader.LoadSourceFiles(); err != nil {
                        return fmt.Errorf("load source files: %w", err)
                }
        }
        return nil
}

func (a *dedupReaderAdapter) SourceFileInfo() []SourceFileInfo {
        sourceFiles := a.reader.SourceFiles()
        hasUsedFlags := a.reader.HasSourceUsedFlags()
        var infos []SourceFileInfo
        for _, sf := range sourceFiles {
                if hasUsedFlags && !sf.Used {
                        continue
                }
                infos = append(infos, SourceFileInfo{
                        RelativePath: sf.RelativePath,
                        Size:         sf.Size,
                        Checksum:     sf.Checksum,
                })
        }
        return infos
}

func (a *dedupReaderAdapter) ReadAt(p []byte, off int64) (n int, err error) {
        return a.reader.ReadAt(p, off)
}

func (a *dedupReaderAdapter) Close() error {
        var errs []error
        if err := a.reader.Close(); err != nil {
                errs = append(errs, err)
        }
        if a.index != nil {
                if err := a.index.Close(); err != nil {
                        errs = append(errs, err)
                }
        }
        if len(errs) > 0 {
                return errs[0]
        }
        return nil
}

// DefaultReaderFactory is the default implementation of ReaderFactory.
type DefaultReaderFactory struct {
        ReadTimeout time.Duration // pread timeout for network FS sources
}

func (f *DefaultReaderFactory) NewReaderLazy(dedupPath, sourceDir string) (ReaderInitializer, error) {
        // When running as root, resolve symlinks once and use the canonical
        // paths for both security checks and subsequent opens. This closes
        // the TOCTOU window where a symlink could be swapped between the
        // ownership check and the actual open/mmap. We use the Resolved
        // variants to avoid redundant EvalSymlinks calls inside the
        // security functions.
        if security.Geteuid() == 0 {
                resolved, err := filepath.EvalSymlinks(dedupPath)
                if err != nil {
                        return nil, fmt.Errorf("resolve dedup path %s: %w", dedupPath, err)
                }
                dedupPath = resolved

                resolved, err = filepath.EvalSymlinks(sourceDir)
                if err != nil {
                        return nil, fmt.Errorf("resolve source dir %s: %w", sourceDir, err)
                }
                sourceDir = resolved
        }

        if err := security.CheckFileOwnershipResolved(dedupPath); err != nil {
                return nil, fmt.Errorf("dedup file %s: %w", dedupPath, err)
        }
        if err := security.CheckDirectoryResolved(sourceDir); err != nil {
                return nil, fmt.Errorf("source dir %s: %w", sourceDir, err)
        }

        reader, err := dedup.NewReaderLazy(dedupPath, sourceDir)
        if err != nil {
                return nil, err
        }
        return &dedupReaderAdapter{reader: reader, readTimeout: f.ReadTimeout}, nil
}

// DefaultConfigReader is the default implementation of ConfigReader.
type DefaultConfigReader struct{}

func (r *DefaultConfigReader) ReadConfig(path string) (*Config, error) {
        config, err := dedup.ReadConfig(path)
        if err != nil {
                return nil, err
        }
        return &Config{
                Name:      config.Name,
                DedupFile: config.DedupFile,
                SourceDir: config.SourceDir,
        }, nil
}

package fuse

import (
        "fmt"
        "os"
        "path/filepath"
        "sync"
        "time"

        "github.com/fsnotify/fsnotify"
)

// configDebounceDelay is the time to wait after the last config file event
// before triggering the action. This coalesces rapid changes from editors
// that write to a temp file and then rename (atomic save).
const configDebounceDelay = 500 * time.Millisecond

// ConfigWatcher monitors config files for changes and either logs a warning
// or triggers a reload callback. It uses inotify for local filesystems and
// falls back to polling for network filesystems (NFS, CIFS/SMB).
type ConfigWatcher struct {
        watcher *fsnotify.Watcher

        // configFiles is the set of absolute config file paths being watched.
        configFiles map[string]bool

        // pollFiles maps absolute config file paths to their last known mtime
        // for directories that use polling instead of inotify.
        pollFiles map[string]time.Time

        action       string // "reload" or "warn"
        reloadFn     func()
        logFn        func(string, ...interface{})
        pollInterval time.Duration

        mu     sync.Mutex
        stopCh chan struct{}
        wg     sync.WaitGroup
}

// NewConfigWatcher creates a new config file watcher with the given action.
// action must be "reload" or "warn".
// If pollInterval <= 0, defaultPollInterval is used.
// The watcher is not started until Start() is called.
func NewConfigWatcher(action string, pollInterval time.Duration, reloadFn func(), logFn func(string, ...interface{})) (*ConfigWatcher, error) {
        switch action {
        case "reload", "warn":
        default:
                return nil, fmt.Errorf("invalid config watch action %q (must be reload or warn)", action)
        }

        watcher, err := fsnotify.NewWatcher()
        if err != nil {
                return nil, err
        }

        if logFn == nil {
                logFn = func(string, ...interface{}) {}
        }

        if pollInterval <= 0 {
                pollInterval = defaultPollInterval
        }

        return &ConfigWatcher{
                watcher:      watcher,
                configFiles:  make(map[string]bool),
                pollFiles:    make(map[string]time.Time),
                action:       action,
                reloadFn:     reloadFn,
                logFn:        logFn,
                pollInterval: pollInterval,
                stopCh:       make(chan struct{}),
        }, nil
}

// Update replaces the set of watched config files. It removes old watches
// and sets up new ones. Called on mount and after reload.
func (cw *ConfigWatcher) Update(configPaths []string) {
        // Build new file set and directory set.
        newFiles := make(map[string]bool, len(configPaths))
        watchDirs := make(map[string]bool)
        for _, p := range configPaths {
                abs, err := filepath.Abs(p)
                if err != nil {
                        cw.logFn("config-watch: warning: cannot resolve %s: %v", p, err)
                        continue
                }
                newFiles[abs] = true
                watchDirs[filepath.Dir(abs)] = true
        }

        cw.mu.Lock()
        // Remove old inotify watches.
        oldDirs := make(map[string]bool)
        for f := range cw.configFiles {
                oldDirs[filepath.Dir(f)] = true
        }
        cw.configFiles = newFiles
        cw.pollFiles = make(map[string]time.Time)
        cw.mu.Unlock()

        // Remove old watches (fsnotify methods are thread-safe).
        for dir := range oldDirs {
                cw.watcher.Remove(dir)
        }

        // Precompute files per directory for efficient poll setup.
        pathsByDir := make(map[string][]string)
        for f := range newFiles {
                dir := filepath.Dir(f)
                pathsByDir[dir] = append(pathsByDir[dir], f)
        }

        // Set up new watches.
        newPollFiles := make(map[string]time.Time)
        for dir := range watchDirs {
                if isNetworkFS(dir) {
                        cw.logFn("config-watch: %s is on a network filesystem, using polling", dir)
                        for _, absPath := range pathsByDir[dir] {
                                if info, err := os.Stat(absPath); err == nil {
                                        newPollFiles[absPath] = info.ModTime()
                                } else {
                                        newPollFiles[absPath] = time.Time{}
                                }
                        }
                } else {
                        if err := cw.watcher.Add(dir); err != nil {
                                cw.logFn("config-watch: warning: cannot watch %s: %v", dir, err)
                        }
                }
        }

        if len(newPollFiles) > 0 {
                cw.mu.Lock()
                cw.pollFiles = newPollFiles
                cw.mu.Unlock()
        }

        cw.logFn("config-watch: monitoring %d config files in %d directories (action=%s)",
                len(newFiles), len(watchDirs), cw.action)
}

// Start begins the event processing loops. Must be called after Update().
func (cw *ConfigWatcher) Start() {
        cw.wg.Add(1)
        go cw.eventLoop()

        cw.wg.Add(1)
        go cw.pollLoop()
}

// Stop stops the watcher and waits for goroutines to exit.
func (cw *ConfigWatcher) Stop() {
        close(cw.stopCh)
        cw.watcher.Close()
        cw.wg.Wait()
}

// eventLoop processes fsnotify events with debouncing.
func (cw *ConfigWatcher) eventLoop() {
        defer cw.wg.Done()

        // Single timer reused across events. Starts stopped; Reset activates it.
        debounceTimer := time.NewTimer(0)
        if !debounceTimer.Stop() {
                <-debounceTimer.C
        }
        defer debounceTimer.Stop()

        for {
                select {
                case event, ok := <-cw.watcher.Events:
                        if !ok {
                                return
                        }
                        if event.Op&(fsnotify.Write|fsnotify.Create|fsnotify.Rename|fsnotify.Remove) == 0 {
                                continue
                        }
                        // Check if this event is for a tracked config file.
                        cw.mu.Lock()
                        tracked := cw.configFiles[event.Name]
                        cw.mu.Unlock()
                        if !tracked {
                                continue
                        }
                        // Reset debounce timer — drain channel if Stop reports
                        // the timer already fired to prevent a stale tick.
                        if !debounceTimer.Stop() {
                                select {
                                case <-debounceTimer.C:
                                default:
                                }
                        }
                        debounceTimer.Reset(configDebounceDelay)

                case <-debounceTimer.C:
                        // Guard against select choosing the timer case when stopCh
                        // is also ready — prefer shutdown over triggering a reload.
                        select {
                        case <-cw.stopCh:
                                return
                        default:
                                cw.triggerAction()
                        }

                case err, ok := <-cw.watcher.Errors:
                        if !ok {
                                return
                        }
                        cw.logFn("config-watch: watcher error: %v", err)

                case <-cw.stopCh:
                        return
                }
        }
}

// pollLoop periodically checks config files on network filesystems.
func (cw *ConfigWatcher) pollLoop() {
        defer cw.wg.Done()

        ticker := time.NewTicker(cw.pollInterval)
        defer ticker.Stop()

        for {
                select {
                case <-ticker.C:
                        select {
                        case <-cw.stopCh:
                                return
                        default:
                                cw.pollCheck()
                        }
                case <-cw.stopCh:
                        return
                }
        }
}

// pollCheck stats all poll-monitored config files and triggers action if
// any have changed.
func (cw *ConfigWatcher) pollCheck() {
        type polledFile struct {
                path      string
                lastMtime time.Time
        }
        cw.mu.Lock()
        snapshot := make([]polledFile, 0, len(cw.pollFiles))
        for absPath, lastMtime := range cw.pollFiles {
                snapshot = append(snapshot, polledFile{path: absPath, lastMtime: lastMtime})
        }
        cw.mu.Unlock()

        type mtimeUpdate struct {
                path     string
                newMtime time.Time
        }
        var updates []mtimeUpdate
        changed := false
        for _, pf := range snapshot {
                info, err := os.Stat(pf.path)
                if err != nil {
                        cw.logFn("config-watch: poll: cannot stat %s: %v", pf.path, err)
                        // Only treat as a change on transition into error/missing state.
                        // This prevents repeated reload triggers every poll tick when a
                        // config file is persistently unreachable.
                        if !pf.lastMtime.IsZero() {
                                updates = append(updates, mtimeUpdate{path: pf.path, newMtime: time.Time{}})
                                changed = true
                        }
                        continue
                }
                if !info.ModTime().Equal(pf.lastMtime) {
                        updates = append(updates, mtimeUpdate{path: pf.path, newMtime: info.ModTime()})
                        changed = true
                }
        }

        if len(updates) > 0 {
                cw.mu.Lock()
                for _, u := range updates {
                        if _, ok := cw.pollFiles[u.path]; ok {
                                cw.pollFiles[u.path] = u.newMtime
                        }
                }
                cw.mu.Unlock()
        }

        if changed {
                cw.triggerAction()
        }
}

// triggerAction executes the configured action (warn or reload).
func (cw *ConfigWatcher) triggerAction() {
        switch cw.action {
        case "warn":
                cw.logFn("config-watch: config file changed (action=warn)")
        case "reload":
                cw.logFn("config-watch: config file changed, triggering reload")
                cw.reloadFn()
        }
}

// Package fuse provides a FUSE filesystem for accessing deduplicated MKV files.
package fuse

import (
        "sync"
        "sync/atomic"

        "github.com/hanwen/go-fuse/v2/fs"
)

// MKVFile represents a virtual MKV file backed by a dedup file.
type MKVFile struct {
        Name      string
        DedupPath string
        SourceDir string
        Size      int64
        reader    DedupReader
        mu        sync.RWMutex

        // disabled is set when a source file change is detected and the
        // configured action is "disable" or "checksum" (with mismatch).
        // When true, Open/Read return EIO. Reset to false on reload.
        disabled bool

        // Factory for lazy initialization (injected from root)
        readerFactory ReaderFactory
}

// MKVFSRoot is the root node of the FUSE filesystem.
type MKVFSRoot struct {
        fs.Inode

        // Directory tree for hierarchical file organization
        rootDir *MKVFSDirNode

        // Flat map for O(1) lookup by full path (kept for backwards compatibility)
        files map[string]*MKVFile

        mu      sync.RWMutex
        verbose bool

        // mounted is set to true after fs.Mount() succeeds. FUSE kernel
        // notifications (NotifyDelete, NotifyEntry, NotifyContent) are only
        // safe to call when the filesystem is mounted — the go-fuse bridge
        // is nil before mount, causing panics.
        mounted atomic.Bool

        // Factories for dependency injection (allows mocking in tests)
        readerFactory ReaderFactory
        configReader  ConfigReader

        // Permission store for chmod/chown support
        permStore *PermissionStore
}

// MKVFSNode represents a file node in the FUSE filesystem.
type MKVFSNode struct {
        fs.Inode
        file      *MKVFile
        path      string // full path for permission lookups
        verbose   bool
        permStore *PermissionStore
}

// MKVFSDirNode represents a directory node in the FUSE filesystem.
type MKVFSDirNode struct {
        fs.Inode
        name    string                   // basename (e.g., "Action")
        path    string                   // full path from root (e.g., "Movies/Action")
        files   map[string]*MKVFile      // files directly in this directory
        subdirs map[string]*MKVFSDirNode // child directories
        mu      sync.RWMutex
        verbose bool

        // Factory for creating file nodes (injected from root)
        readerFactory ReaderFactory

        // Permission store for chmod/chown support
        permStore *PermissionStore
}

// Ensure interfaces are implemented
var _ fs.InodeEmbedder = (*MKVFSRoot)(nil)
var _ fs.InodeEmbedder = (*MKVFSNode)(nil)
var _ fs.InodeEmbedder = (*MKVFSDirNode)(nil)
var _ fs.NodeReaddirer = (*MKVFSRoot)(nil)
var _ fs.NodeLookuper = (*MKVFSRoot)(nil)
var _ fs.NodeGetattrer = (*MKVFSRoot)(nil)
var _ fs.NodeReaddirer = (*MKVFSDirNode)(nil)
var _ fs.NodeLookuper = (*MKVFSDirNode)(nil)
var _ fs.NodeGetattrer = (*MKVFSDirNode)(nil)
var _ fs.NodeMkdirer = (*MKVFSDirNode)(nil)
var _ fs.NodeRmdirer = (*MKVFSDirNode)(nil)
var _ fs.NodeUnlinker = (*MKVFSDirNode)(nil)
var _ fs.NodeCreater = (*MKVFSDirNode)(nil)
var _ fs.NodeOpener = (*MKVFSNode)(nil)
var _ fs.NodeReader = (*MKVFSNode)(nil)
var _ fs.NodeGetattrer = (*MKVFSNode)(nil)
var _ fs.NodeSetattrer = (*MKVFSNode)(nil)
var _ fs.NodeSetattrer = (*MKVFSDirNode)(nil)

// getFilePerms returns file permissions from the store, or defaults if store is nil.
func getFilePerms(store *PermissionStore, path string) (uid, gid, mode uint32) {
        if store != nil {
                return store.GetFilePerms(path)
        }
        return 0, 0, 0444
}

// getDirPerms returns directory permissions from the store, or defaults if store is nil.
func getDirPerms(store *PermissionStore, path string) (uid, gid, mode uint32) {
        if store != nil {
                return store.GetDirPerms(path)
        }
        return 0, 0, 0555
}

package fuse

import (
        "context"
        "log"
        "sort"
        "syscall"
        "time"

        "github.com/hanwen/go-fuse/v2/fs"
        "github.com/hanwen/go-fuse/v2/fuse"
)

// --- MKVFSDirNode interface implementations ---

// Readdir implements fs.NodeReaddirer - lists files and subdirectories.
func (d *MKVFSDirNode) Readdir(ctx context.Context) (fs.DirStream, syscall.Errno) {
        // Permission checks are handled by the kernel via default_permissions mount option.
        return d.readdirInternal(ctx)
}

// readdirInternal performs the directory listing. It does not perform any permission
// checks itself (those are handled by the kernel via default_permissions) and is
// shared by both MKVFSRoot.Readdir and MKVFSDirNode.Readdir.
func (d *MKVFSDirNode) readdirInternal(ctx context.Context) (fs.DirStream, syscall.Errno) {
        d.mu.RLock()
        defer d.mu.RUnlock()

        if d.verbose {
                log.Printf("Readdir: %s (files=%d, subdirs=%d)", d.path, len(d.files), len(d.subdirs))
        }

        entries := make([]fuse.DirEntry, 0, len(d.files)+len(d.subdirs))

        // Collect and sort subdirectory names for deterministic ordering
        subdirNames := make([]string, 0, len(d.subdirs))
        for name := range d.subdirs {
                subdirNames = append(subdirNames, name)
        }
        sort.Strings(subdirNames)

        // Add subdirectories first (sorted)
        for _, name := range subdirNames {
                if d.verbose {
                        log.Printf("Readdir: adding subdir %s", name)
                }
                entries = append(entries, fuse.DirEntry{
                        Name: name,
                        Mode: fuse.S_IFDIR,
                })
        }

        // Collect and sort file names for deterministic ordering
        fileNames := make([]string, 0, len(d.files))
        for name := range d.files {
                fileNames = append(fileNames, name)
        }
        sort.Strings(fileNames)

        // Add files (sorted)
        for _, name := range fileNames {
                if d.verbose {
                        log.Printf("Readdir: adding file %s", name)
                }
                entries = append(entries, fuse.DirEntry{
                        Name: name,
                        Mode: fuse.S_IFREG,
                })
        }

        return fs.NewListDirStream(entries), 0
}

// Lookup implements fs.NodeLookuper - looks up a file or subdirectory by name.
func (d *MKVFSDirNode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fs.Inode, syscall.Errno) {
        // Permission checks are handled by the kernel via default_permissions mount option.

        d.mu.RLock()
        defer d.mu.RUnlock()

        // Check subdirectories first
        if subdir, ok := d.subdirs[name]; ok {
                if d.verbose {
                        log.Printf("Lookup: found subdir %s in %s", name, d.path)
                }

                // Lock subdir to safely access its fields
                subdir.mu.RLock()
                subdirCount := len(subdir.subdirs)
                subdir.mu.RUnlock()

                uid, gid, mode := getDirPerms(d.permStore, subdir.path)

                now := time.Now()
                out.Mode = fuse.S_IFDIR | mode
                out.Uid = uid
                out.Gid = gid
                out.Atime = uint64(now.Unix())
                out.Mtime = uint64(now.Unix())
                out.Ctime = uint64(now.Unix())
                out.Nlink = 2 + uint32(subdirCount)

                stable := fs.StableAttr{
                        Mode: fuse.S_IFDIR,
                        Ino:  hashString(subdir.path),
                }
                child := d.NewPersistentInode(ctx, subdir, stable)
                return child, 0
        }

        // Check files
        if file, ok := d.files[name]; ok {
                if d.verbose {
                        log.Printf("Lookup: found file %s in %s (size=%d)", name, d.path, file.Size)
                }

                var filePath string
                if d.path == "" {
                        filePath = name
                } else {
                        filePath = d.path + "/" + name
                }

                uid, gid, mode := getFilePerms(d.permStore, filePath)

                now := time.Now()
                out.Size = uint64(file.Size)
                out.Mode = fuse.S_IFREG | mode
                out.Uid = uid
                out.Gid = gid
                out.Atime = uint64(now.Unix())
                out.Mtime = uint64(now.Unix())
                out.Ctime = uint64(now.Unix())
                out.Nlink = 1

                node := &MKVFSNode{file: file, path: filePath, verbose: d.verbose, permStore: d.permStore}
                stable := fs.StableAttr{
                        Mode: fuse.S_IFREG,
                        Ino:  hashString(filePath),
                }
                child := d.NewInode(ctx, node, stable)
                return child, 0
        }

        if d.verbose {
                log.Printf("Lookup: not found %s in %s", name, d.path)
        }
        return nil, syscall.ENOENT
}

// Getattr implements fs.NodeGetattrer - returns directory attributes.
func (d *MKVFSDirNode) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno {
        d.mu.RLock()
        defer d.mu.RUnlock()

        now := time.Now()

        uid, gid, mode := getDirPerms(d.permStore, d.path)

        out.Mode = fuse.S_IFDIR | mode
        out.Uid = uid
        out.Gid = gid
        out.Atime = uint64(now.Unix())
        out.Mtime = uint64(now.Unix())
        out.Ctime = uint64(now.Unix())
        out.Nlink = 2 + uint32(len(d.subdirs))
        return 0
}

// Setattr implements fs.NodeSetattrer - handles chmod/chown on directories.
func (d *MKVFSDirNode) Setattr(ctx context.Context, fh fs.FileHandle, in *fuse.SetAttrIn, out *fuse.AttrOut) syscall.Errno {
        if d.permStore == nil {
                // No permission store - can't change permissions
                return syscall.EROFS
        }

        // Only UID, GID, and mode changes are supported. All other setattr operations
        // (e.g. size truncation, atime/mtime updates) must fail on this read-only FS.
        supportedMask := uint32(fuse.FATTR_UID | fuse.FATTR_GID | fuse.FATTR_MODE)
        if in.Valid&^supportedMask != 0 {
                return syscall.EROFS
        }

        // Get current permissions and caller
        dirUID, dirGID, dirMode := getDirPerms(d.permStore, d.path)
        caller, ok := GetCaller(ctx)
        if !ok {
                return syscall.EACCES
        }

        var newUID, newGID, newMode *uint32

        // Check which fields are being changed
        if in.Valid&fuse.FATTR_UID != 0 {
                newUID = &in.Uid
        }
        if in.Valid&fuse.FATTR_GID != 0 {
                newGID = &in.Gid
        }
        if in.Valid&fuse.FATTR_MODE != 0 {
                mode := in.Mode & 0777 // Only permission bits
                newMode = &mode
        }

        // Normalize no-op changes to nil to avoid unnecessary disk writes
        if newUID != nil && *newUID == dirUID {
                newUID = nil
        }
        if newGID != nil && *newGID == dirGID {
                newGID = nil
        }
        if newMode != nil && *newMode == dirMode {
                newMode = nil
        }

        // Permission checks for chown
        if newUID != nil || newGID != nil {
                if errno := CheckChown(caller, dirUID, dirGID, newUID, newGID); errno != 0 {
                        if d.verbose {
                                log.Printf("Setattr: chown permission denied for %s (caller uid=%d)", d.path, caller.Uid)
                        }
                        return errno
                }
        }

        // Permission checks for chmod
        if newMode != nil {
                if errno := CheckChmod(caller, dirUID); errno != 0 {
                        if d.verbose {
                                log.Printf("Setattr: chmod permission denied for %s (caller uid=%d)", d.path, caller.Uid)
                        }
                        return errno
                }
        }

        // Update permission store
        if err := d.permStore.SetDirPerms(d.path, newUID, newGID, newMode); err != nil {
                if d.verbose {
                        log.Printf("Setattr error: %s: %v", d.path, err)
                }
                return syscall.EIO
        }

        if d.verbose {
                log.Printf("Setattr: %s uid=%v gid=%v mode=%v", d.path, newUID, newGID, newMode)
        }

        // Return updated attributes
        return d.Getattr(ctx, fh, out)
}

// --- Read-only filesystem error handlers ---
// These return EROFS (Read-only file system) for write operations.

// Mkdir implements fs.NodeMkdirer - rejects directory creation.
func (d *MKVFSDirNode) Mkdir(ctx context.Context, name string, mode uint32, out *fuse.EntryOut) (*fs.Inode, syscall.Errno) {
        if d.verbose {
                log.Printf("Mkdir: rejected (read-only) %s in %s", name, d.path)
        }
        return nil, syscall.EROFS
}

// Rmdir implements fs.NodeRmdirer - rejects directory removal.
func (d *MKVFSDirNode) Rmdir(ctx context.Context, name string) syscall.Errno {
        if d.verbose {
                log.Printf("Rmdir: rejected (read-only) %s in %s", name, d.path)
        }
        return syscall.EROFS
}

// Unlink implements fs.NodeUnlinker - rejects file deletion.
func (d *MKVFSDirNode) Unlink(ctx context.Context, name string) syscall.Errno {
        if d.verbose {
                log.Printf("Unlink: rejected (read-only) %s in %s", name, d.path)
        }
        return syscall.EROFS
}

// Create implements fs.NodeCreater - rejects file creation.
func (d *MKVFSDirNode) Create(ctx context.Context, name string, flags uint32, mode uint32, out *fuse.EntryOut) (node *fs.Inode, fh fs.FileHandle, fuseFlags uint32, errno syscall.Errno) {
        if d.verbose {
                log.Printf("Create: rejected (read-only) %s in %s", name, d.path)
        }
        return nil, nil, 0, syscall.EROFS
}

package fuse

import (
        "fmt"
        "log"
        "path/filepath"
        "sync"

        "github.com/stuckj/mkvdup/internal/dedup"
)

// MKVFSOptions contains options for creating an MKVFS filesystem.
type MKVFSOptions struct {
        Verbose         bool
        PermissionsPath string
        // Defaults holds the default permissions to use when a PermissionStore is configured.
        // If nil, DefaultPerms() is used. Set to a non-nil value to use specific defaults.
        // Note: explicit-zero defaults only work when provided programmatically here;
        // they are not persisted to or loaded from the permissions YAML file.
        Defaults *Defaults
}

// NewMKVFS creates a new MKVFS root from a list of config files.
// Config files are resolved recursively (includes and virtual_files are expanded).
// Set verbose=true to enable debug logging.
func NewMKVFS(configPaths []string, verbose bool) (*MKVFSRoot, error) {
        configs, _, _, err := dedup.ResolveConfigs(configPaths)
        if err != nil {
                return nil, fmt.Errorf("resolve configs: %w", err)
        }
        return NewMKVFSFromConfigs(configs, verbose, &DefaultReaderFactory{}, nil)
}

// NewMKVFSWithPermissions creates a new MKVFS root with a permission store.
// Config files are resolved recursively (includes and virtual_files are expanded).
func NewMKVFSWithPermissions(configPaths []string, verbose bool, permStore *PermissionStore) (*MKVFSRoot, error) {
        configs, _, _, err := dedup.ResolveConfigs(configPaths)
        if err != nil {
                return nil, fmt.Errorf("resolve configs: %w", err)
        }
        return NewMKVFSFromConfigs(configs, verbose, &DefaultReaderFactory{}, permStore)
}

// NewMKVFSWithOptions creates a new MKVFS root with the given options.
// Config files are resolved recursively (includes and virtual_files are expanded).
func NewMKVFSWithOptions(configPaths []string, opts MKVFSOptions) (*MKVFSRoot, error) {
        var permStore *PermissionStore
        if opts.PermissionsPath != "" {
                defaults := DefaultPerms()
                if opts.Defaults != nil {
                        defaults = *opts.Defaults
                }
                permStore = NewPermissionStore(opts.PermissionsPath, defaults, opts.Verbose)
                if err := permStore.Load(); err != nil {
                        return nil, fmt.Errorf("load permissions: %w", err)
                }
        }
        configs, _, _, err := dedup.ResolveConfigs(configPaths)
        if err != nil {
                return nil, fmt.Errorf("resolve configs: %w", err)
        }
        return NewMKVFSFromConfigs(configs, opts.Verbose, &DefaultReaderFactory{}, permStore)
}

// NewMKVFSWithFactories creates a new MKVFS root with custom factories.
// This allows injecting mock implementations for testing.
func NewMKVFSWithFactories(configPaths []string, verbose bool, readerFactory ReaderFactory, configReader ConfigReader, permStore *PermissionStore) (*MKVFSRoot, error) {
        root := &MKVFSRoot{
                files:         make(map[string]*MKVFile),
                verbose:       verbose,
                readerFactory: readerFactory,
                configReader:  configReader,
                permStore:     permStore,
        }

        if verbose {
                log.Printf("Creating MKVFS with %d config files", len(configPaths))
        }

        for _, configPath := range configPaths {
                if verbose {
                        log.Printf("Reading config: %s", configPath)
                }
                config, err := root.configReader.ReadConfig(configPath)
                if err != nil {
                        return nil, fmt.Errorf("read config %s: %w", configPath, err)
                }
                if verbose {
                        log.Printf("Config: name=%s, dedup=%s, source=%s", config.Name, config.DedupFile, config.SourceDir)
                }

                // Resolve relative paths
                configDir := filepath.Dir(configPath)
                dedupPath := config.DedupFile
                if !filepath.IsAbs(dedupPath) {
                        dedupPath = filepath.Join(configDir, dedupPath)
                }
                sourceDir := config.SourceDir
                if !filepath.IsAbs(sourceDir) {
                        sourceDir = filepath.Join(configDir, sourceDir)
                }

                // Open dedup file to get size (lazy loading - only reads header)
                if verbose {
                        log.Printf("Opening dedup file: %s", dedupPath)
                }
                reader, err := root.readerFactory.NewReaderLazy(dedupPath, sourceDir)
                if err != nil {
                        if verbose {
                                log.Printf("Failed to open dedup file: %v", err)
                        }
                        return nil, fmt.Errorf("open dedup file %s: %w", dedupPath, err)
                }

                mkvFile := &MKVFile{
                        Name:          config.Name,
                        DedupPath:     dedupPath,
                        SourceDir:     sourceDir,
                        Size:          reader.OriginalSize(),
                        readerFactory: root.readerFactory,
                }

                // Don't keep reader open - we'll open it lazily
                reader.Close()

                root.files[config.Name] = mkvFile
                if verbose {
                        log.Printf("Added file: %s (size=%d)", config.Name, mkvFile.Size)
                }
        }

        if verbose {
                log.Printf("Total files: %d", len(root.files))
        }

        // Build directory tree from collected files
        fileList := make([]*MKVFile, 0, len(root.files))
        for _, f := range root.files {
                fileList = append(fileList, f)
        }
        root.rootDir = BuildDirectoryTree(fileList, verbose, readerFactory, permStore)

        // Clean up stale permission entries if we have a permission store
        if permStore != nil {
                validFiles, validDirs := root.collectValidPaths()
                removed := permStore.CleanupStale(validFiles, validDirs)
                if removed > 0 {
                        if verbose {
                                log.Printf("Cleaned up %d stale permission entries", removed)
                        }
                        if err := permStore.Save(); err != nil {
                                log.Printf("Warning: failed to save permissions after cleanup: %v", err)
                        }
                }
        }

        if verbose {
                log.Printf("Directory tree built with %d root entries", len(root.rootDir.files)+len(root.rootDir.subdirs))
        }

        return root, nil
}

// maxParallelReaders limits concurrent dedup header reads to avoid
// exhausting file descriptors when mounting thousands of files.
const maxParallelReaders = 64

// readConfigHeaders reads dedup file headers in parallel with concurrency
// bounded by maxParallelReaders. It returns a slice of MKVFile (indexed by
// config position) and the first error encountered. On error, no partial
// results are returned and the slice is nil.
func readConfigHeaders(configs []dedup.Config, readerFactory ReaderFactory, verbose bool) ([]*MKVFile, error) {
        results := make([]*MKVFile, len(configs))

        // For small counts, read sequentially to avoid goroutine overhead
        if len(configs) <= 4 {
                for i, config := range configs {
                        if verbose {
                                log.Printf("Opening dedup file: %s", config.DedupFile)
                        }
                        reader, err := readerFactory.NewReaderLazy(config.DedupFile, config.SourceDir)
                        if err != nil {
                                return nil, fmt.Errorf("open dedup file %s: %w", config.DedupFile, err)
                        }
                        results[i] = &MKVFile{
                                Name:          config.Name,
                                DedupPath:     config.DedupFile,
                                SourceDir:     config.SourceDir,
                                Size:          reader.OriginalSize(),
                                readerFactory: readerFactory,
                        }
                        reader.Close()
                }
                return results, nil
        }

        var (
                wg    sync.WaitGroup
                errMu sync.Mutex
                first error
        )

        // Fixed-size worker pool pulling jobs from a channel.
        numWorkers := maxParallelReaders
        if len(configs) < numWorkers {
                numWorkers = len(configs)
        }
        jobs := make(chan int)

        wg.Add(numWorkers)
        for range numWorkers {
                go func() {
                        defer wg.Done()
                        for idx := range jobs {
                                // Skip work if another worker already failed,
                                // but keep draining jobs to avoid deadlocking the sender.
                                errMu.Lock()
                                failed := first != nil
                                errMu.Unlock()
                                if failed {
                                        continue
                                }

                                cfg := configs[idx]
                                reader, err := readerFactory.NewReaderLazy(cfg.DedupFile, cfg.SourceDir)
                                if err != nil {
                                        errMu.Lock()
                                        if first == nil {
                                                first = fmt.Errorf("open dedup file %s: %w", cfg.DedupFile, err)
                                        }
                                        errMu.Unlock()
                                        continue
                                }

                                results[idx] = &MKVFile{
                                        Name:          cfg.Name,
                                        DedupPath:     cfg.DedupFile,
                                        SourceDir:     cfg.SourceDir,
                                        Size:          reader.OriginalSize(),
                                        readerFactory: readerFactory,
                                }
                                reader.Close()
                        }
                }()
        }

        for i := range configs {
                jobs <- i
        }
        close(jobs)

        wg.Wait()

        if first != nil {
                return nil, first
        }
        return results, nil
}

// NewMKVFSFromConfigs creates a new MKVFS root from already-resolved configs.
// Paths in configs must already be absolute (as returned by dedup.ResolveConfigs).
// Dedup file headers are read in parallel for faster startup with many files.
func NewMKVFSFromConfigs(configs []dedup.Config, verbose bool, readerFactory ReaderFactory, permStore *PermissionStore) (*MKVFSRoot, error) {
        root := &MKVFSRoot{
                files:         make(map[string]*MKVFile),
                verbose:       verbose,
                readerFactory: readerFactory,
                permStore:     permStore,
        }

        if verbose {
                log.Printf("Creating MKVFS with %d resolved configs", len(configs))
        }

        mkvFiles, err := readConfigHeaders(configs, readerFactory, verbose)
        if err != nil {
                return nil, err
        }

        for _, mkvFile := range mkvFiles {
                if mkvFile == nil {
                        continue
                }
                root.files[mkvFile.Name] = mkvFile
                if verbose {
                        log.Printf("Added file: %s (size=%d)", mkvFile.Name, mkvFile.Size)
                }
        }

        if verbose {
                log.Printf("Total files: %d", len(root.files))
        }

        // Build directory tree from collected files
        fileList := make([]*MKVFile, 0, len(root.files))
        for _, f := range root.files {
                fileList = append(fileList, f)
        }
        root.rootDir = BuildDirectoryTree(fileList, verbose, readerFactory, permStore)

        // Clean up stale permission entries if we have a permission store
        if permStore != nil {
                validFiles, validDirs := root.collectValidPaths()
                removed := permStore.CleanupStale(validFiles, validDirs)
                if removed > 0 {
                        if verbose {
                                log.Printf("Cleaned up %d stale permission entries", removed)
                        }
                        if err := permStore.Save(); err != nil {
                                log.Printf("Warning: failed to save permissions after cleanup: %v", err)
                        }
                }
        }

        if verbose {
                log.Printf("Directory tree built with %d root entries", len(root.rootDir.files)+len(root.rootDir.subdirs))
        }

        return root, nil
}

package fuse

import (
        "context"
        "fmt"
        "log"
        "syscall"
        "time"

        "github.com/hanwen/go-fuse/v2/fs"
        "github.com/hanwen/go-fuse/v2/fuse"
)

// Getattr implements fs.NodeGetattrer - returns file attributes.
func (n *MKVFSNode) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno {
        now := time.Now()
        out.Size = uint64(n.file.Size)

        uid, gid, mode := getFilePerms(n.permStore, n.path)

        out.Mode = fuse.S_IFREG | mode
        out.Uid = uid
        out.Gid = gid
        out.Atime = uint64(now.Unix())
        out.Mtime = uint64(now.Unix())
        out.Ctime = uint64(now.Unix())
        out.Nlink = 1
        return 0
}

// Setattr implements fs.NodeSetattrer - handles chmod/chown on files.
func (n *MKVFSNode) Setattr(ctx context.Context, fh fs.FileHandle, in *fuse.SetAttrIn, out *fuse.AttrOut) syscall.Errno {
        if n.permStore == nil {
                // No permission store - can't change permissions
                return syscall.EROFS
        }

        // Only UID, GID, and mode changes are supported. All other setattr operations
        // (e.g. size truncation, atime/mtime updates) must fail on this read-only FS.
        supportedMask := uint32(fuse.FATTR_UID | fuse.FATTR_GID | fuse.FATTR_MODE)
        if in.Valid&^supportedMask != 0 {
                return syscall.EROFS
        }

        // Get current permissions and caller
        fileUID, fileGID, fileMode := getFilePerms(n.permStore, n.path)
        caller, ok := GetCaller(ctx)
        if !ok {
                return syscall.EACCES
        }

        var newUID, newGID, newMode *uint32

        // Check which fields are being changed
        if in.Valid&fuse.FATTR_UID != 0 {
                newUID = &in.Uid
        }
        if in.Valid&fuse.FATTR_GID != 0 {
                newGID = &in.Gid
        }
        if in.Valid&fuse.FATTR_MODE != 0 {
                mode := in.Mode & 0777 // Only permission bits
                newMode = &mode
        }

        // Normalize no-op changes to nil to avoid unnecessary disk writes
        if newUID != nil && *newUID == fileUID {
                newUID = nil
        }
        if newGID != nil && *newGID == fileGID {
                newGID = nil
        }
        if newMode != nil && *newMode == fileMode {
                newMode = nil
        }

        // Permission checks for chown
        if newUID != nil || newGID != nil {
                if errno := CheckChown(caller, fileUID, fileGID, newUID, newGID); errno != 0 {
                        if n.verbose {
                                log.Printf("Setattr: chown permission denied for %s (caller uid=%d)", n.path, caller.Uid)
                        }
                        return errno
                }
        }

        // Permission checks for chmod
        if newMode != nil {
                if errno := CheckChmod(caller, fileUID); errno != 0 {
                        if n.verbose {
                                log.Printf("Setattr: chmod permission denied for %s (caller uid=%d)", n.path, caller.Uid)
                        }
                        return errno
                }
        }

        // Update permission store
        if err := n.permStore.SetFilePerms(n.path, newUID, newGID, newMode); err != nil {
                if n.verbose {
                        log.Printf("Setattr error: %s: %v", n.path, err)
                }
                return syscall.EIO
        }

        if n.verbose {
                log.Printf("Setattr: %s uid=%v gid=%v mode=%v", n.path, newUID, newGID, newMode)
        }

        // Return updated attributes
        return n.Getattr(ctx, fh, out)
}

// Open implements fs.NodeOpener - opens a file for reading.
func (n *MKVFSNode) Open(ctx context.Context, flags uint32) (fs.FileHandle, uint32, syscall.Errno) {
        // This is a read-only filesystem - reject any write access or operations
        // that would modify the filesystem. Note: O_RDONLY|O_APPEND is a valid
        // read-only open on Linux (positions at EOF), so we only check access mode.
        accMode := flags & syscall.O_ACCMODE
        if accMode != syscall.O_RDONLY || flags&(syscall.O_TRUNC|syscall.O_CREAT) != 0 {
                return nil, 0, syscall.EROFS
        }

        // Permission checks are handled by the kernel via default_permissions mount option.

        // Check if file was disabled due to source file change
        n.file.mu.RLock()
        disabled := n.file.disabled
        n.file.mu.RUnlock()
        if disabled {
                if n.verbose {
                        log.Printf("Open: %s: source file changed, file disabled", n.file.Name)
                }
                return nil, 0, syscall.EIO
        }

        if n.verbose {
                log.Printf("Open: %s", n.file.Name)
        }
        // Initialize reader lazily if needed
        if err := n.ensureReader(); err != nil {
                if n.verbose {
                        log.Printf("Open error: %s: %v", n.file.Name, err)
                }
                return nil, 0, syscall.EIO
        }
        return nil, fuse.FOPEN_KEEP_CACHE | fuse.FOPEN_CACHE_DIR, 0
}

// Read implements fs.NodeReader - reads data from the file.
func (n *MKVFSNode) Read(ctx context.Context, fh fs.FileHandle, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) {
        // Permission checks are handled by the kernel via default_permissions mount option.

        n.file.mu.RLock()
        defer n.file.mu.RUnlock()

        if n.file.disabled {
                if n.verbose {
                        log.Printf("Read error: %s: source file changed, file disabled", n.file.Name)
                }
                return nil, syscall.EIO
        }

        if n.file.reader == nil {
                // Reader not initialized
                if n.verbose {
                        log.Printf("Read error: %s: reader not initialized", n.file.Name)
                }
                return nil, syscall.EIO
        }

        // Clamp read to file size
        if off >= n.file.Size {
                return fuse.ReadResultData(nil), 0
        }

        endOff := off + int64(len(dest))
        if endOff > n.file.Size {
                dest = dest[:n.file.Size-off]
        }

        // Read from dedup reader
        nRead, err := n.file.reader.ReadAt(dest, off)
        if err != nil && nRead == 0 {
                if n.verbose {
                        log.Printf("Read error: %s at offset %d: %v", n.file.Name, off, err)
                }
                return nil, syscall.EIO
        }

        if n.verbose {
                log.Printf("Read: %s offset=%d len=%d read=%d", n.file.Name, off, len(dest), nRead)
        }

        return fuse.ReadResultData(dest[:nRead]), 0
}

// ensureReader ensures the dedup reader is initialized.
func (n *MKVFSNode) ensureReader() error {
        n.file.mu.Lock()
        defer n.file.mu.Unlock()

        if n.file.reader != nil {
                return nil
        }

        // Open dedup file with lazy loading using the factory
        reader, err := n.file.readerFactory.NewReaderLazy(n.file.DedupPath, n.file.SourceDir)
        if err != nil {
                return fmt.Errorf("open dedup file: %w", err)
        }

        // Initialize the reader for reading (handles ES vs raw internally)
        if err := reader.InitializeForReading(n.file.SourceDir); err != nil {
                reader.Close()
                return fmt.Errorf("initialize reader: %w", err)
        }

        n.file.reader = reader
        return nil
}

// Disable marks the file as disabled (source changed). Subsequent reads
// return EIO. Closes any active reader. Thread-safe.
func (f *MKVFile) Disable() {
        f.mu.Lock()
        defer f.mu.Unlock()
        f.disabled = true
        if f.reader != nil {
                f.reader.Close()
                f.reader = nil
        }
}

// Enable re-enables a previously disabled file (e.g., after checksum
// verification confirms the source is OK). The reader will be lazily
// re-initialized on next Open.
func (f *MKVFile) Enable() {
        f.mu.Lock()
        defer f.mu.Unlock()
        f.disabled = false
}

// Close cleans up the file's resources.
func (f *MKVFile) Close() {
        f.mu.Lock()
        defer f.mu.Unlock()

        if f.reader != nil {
                f.reader.Close()
                f.reader = nil
        }
}

// updateFrom copies data fields from src into f. If the underlying dedup file
// changed, any active reader is closed since it's no longer valid.
// The caller must hold f.mu (write lock).
func (f *MKVFile) updateFrom(src *MKVFile) {
        // Close reader if the underlying file changed — it's no longer valid
        if f.reader != nil && (f.DedupPath != src.DedupPath || f.SourceDir != src.SourceDir) {
                f.reader.Close()
                f.reader = nil
        }
        f.Name = src.Name
        f.DedupPath = src.DedupPath
        f.SourceDir = src.SourceDir
        f.Size = src.Size
        f.readerFactory = src.readerFactory
        // Reset disabled flag — reload re-validates source files
        f.disabled = false
}

// hashString creates a stable inode number from a string.
func hashString(s string) uint64 {
        var h uint64 = 5381
        for _, c := range s {
                h = ((h << 5) + h) + uint64(c)
        }
        return h
}

package fuse

import (
        "context"
        "fmt"
        "log"
        "path"
        "strings"
        "sync"
        "syscall"
        "time"

        "github.com/hanwen/go-fuse/v2/fs"
        "github.com/hanwen/go-fuse/v2/fuse"
        "github.com/stuckj/mkvdup/internal/dedup"
)

// reloadNotification captures a pending FUSE kernel notification to emit
// after all locks are released (go-fuse notifications must not be called
// while holding filesystem locks, as the kernel may call back into the FS).
type reloadNotification struct {
        parent   *fs.Inode
        child    *fs.Inode // non-nil for deletions (if kernel had cached the inode)
        name     string
        isDelete bool
}

// findParentInode walks the directory tree to find the parent inode for a
// given file path (e.g., "Movies/Action/film.mkv"). Returns the parent's
// go-fuse Inode and the basename, or (nil, "") if the parent directory
// doesn't exist in the tree.
//
// For root-level files (no directory component), returns r.Inode.
// Caller must NOT hold directory locks — this method acquires them.
func (r *MKVFSRoot) findParentInode(filePath string) (*fs.Inode, string) {
        cleaned := path.Clean(filePath)
        parts := strings.Split(cleaned, "/")
        // Filter empty parts (handles leading slashes)
        valid := make([]string, 0, len(parts))
        for _, p := range parts {
                if p != "" && p != "." {
                        valid = append(valid, p)
                }
        }
        if len(valid) == 0 {
                return nil, ""
        }

        basename := valid[len(valid)-1]
        dirParts := valid[:len(valid)-1]

        if len(dirParts) == 0 {
                // File is at root level — parent is the root inode
                return &r.Inode, basename
        }

        // Walk directory tree to find parent
        current := r.rootDir
        for _, part := range dirParts {
                current.mu.RLock()
                subdir, ok := current.subdirs[part]
                current.mu.RUnlock()
                if !ok {
                        return nil, ""
                }
                current = subdir
        }

        // Newly created directories from mergeDirectoryTree have uninitialized
        // fs.Inode (never registered with go-fuse via NewPersistentInode).
        // The kernel doesn't know about them, so notifications would panic.
        // Return nil — the kernel will discover the directory via Lookup.
        if current.Inode.StableAttr().Ino == 0 {
                return nil, ""
        }

        return &current.Inode, basename
}

// markAncestorDirs walks from inode up to (and including) the root,
// adding each ancestor to changedDirs so their readdir caches are
// invalidated. This is necessary because a file addition or removal
// in a deeply nested virtual directory may cause intermediate
// directories to be created or removed by the tree merge.
func markAncestorDirs(inode *fs.Inode, changedDirs map[*fs.Inode]bool) {
        for node := inode; ; {
                _, ancestor := node.Parent()
                if ancestor == nil {
                        break
                }
                if changedDirs[ancestor] {
                        break // already marked — ancestors above must be too
                }
                changedDirs[ancestor] = true
                node = ancestor
        }
}

// Reload updates the filesystem with new configs. It updates existing MKVFile
// objects in place to preserve pointer identity for cached FUSE inodes, and
// merges the directory tree structure (required because go-fuse caches
// persistent inode objects by inode number).
//
// After the merge, FUSE kernel notifications are emitted:
//   - NotifyDelete for removed files (sends IN_DELETE to inotify watchers)
//   - NotifyEntry for added files (invalidates kernel dentry cache)
//   - NotifyContent on changed directories (invalidates readdir cache)
//
// Note: The FUSE protocol has no NOTIFY_CREATE, so added files don't
// generate proactive inotify events. Media servers should use periodic
// scanning in addition to inotify watching.
//
// Semantics:
//   - New files become immediately visible
//   - Removed files disappear from listings
//   - Modified mappings update existing MKVFile objects in place; active readers
//     are closed if the underlying dedup path changed (re-opened lazily on next read)
//   - Permissions are reloaded from disk and stale entries cleaned up
//     (cleanup is skipped if permission reload fails, to avoid overwriting
//     a temporarily unreadable permissions file)
func (r *MKVFSRoot) Reload(configs []dedup.Config, logFn func(string, ...interface{})) error {
        if logFn == nil {
                logFn = func(string, ...interface{}) {}
        }

        // Build new file set from configs (parallel header reads with soft failure)
        newFiles := make(map[string]*MKVFile)
        type reloadResult struct {
                file *MKVFile
                err  error
        }
        results := make([]reloadResult, len(configs))

        if len(configs) <= 4 {
                // Sequential for small counts
                for i, config := range configs {
                        reader, err := r.readerFactory.NewReaderLazy(config.DedupFile, config.SourceDir)
                        if err != nil {
                                results[i] = reloadResult{err: fmt.Errorf("open dedup file %s: %w", config.DedupFile, err)}
                                continue
                        }
                        results[i] = reloadResult{file: &MKVFile{
                                Name:          config.Name,
                                DedupPath:     config.DedupFile,
                                SourceDir:     config.SourceDir,
                                Size:          reader.OriginalSize(),
                                readerFactory: r.readerFactory,
                        }}
                        reader.Close()
                }
        } else {
                // Fixed-size worker pool to bound goroutine count and open file concurrency.
                numWorkers := maxParallelReaders
                if len(configs) < numWorkers {
                        numWorkers = len(configs)
                }
                jobs := make(chan int)
                var wg sync.WaitGroup

                wg.Add(numWorkers)
                for range numWorkers {
                        go func() {
                                defer wg.Done()
                                for idx := range jobs {
                                        cfg := configs[idx]
                                        reader, err := r.readerFactory.NewReaderLazy(cfg.DedupFile, cfg.SourceDir)
                                        if err != nil {
                                                results[idx] = reloadResult{err: fmt.Errorf("open dedup file %s: %w", cfg.DedupFile, err)}
                                                continue
                                        }
                                        results[idx] = reloadResult{file: &MKVFile{
                                                Name:          cfg.Name,
                                                DedupPath:     cfg.DedupFile,
                                                SourceDir:     cfg.SourceDir,
                                                Size:          reader.OriginalSize(),
                                                readerFactory: r.readerFactory,
                                        }}
                                        reader.Close()
                                }
                        }()
                }

                for i := range configs {
                        jobs <- i
                }
                close(jobs)
                wg.Wait()
        }

        for i, res := range results {
                if res.err != nil {
                        logFn("warning: skipping %s: %v", configs[i].Name, res.err)
                        continue
                }
                if existing, ok := newFiles[res.file.Name]; ok {
                        logFn("warning: duplicate name %q (dedup: %s replaced by %s)", res.file.Name, existing.DedupPath, res.file.DedupPath)
                }
                newFiles[res.file.Name] = res.file
        }

        // Snapshot old file names for change detection
        r.mu.RLock()
        oldFileNames := make(map[string]bool, len(r.files))
        for name := range r.files {
                oldFileNames[name] = true
        }
        r.mu.RUnlock()

        // Before merge: capture child inodes for files being removed. We need
        // these for NotifyDelete (sends IN_DELETE inotify event), and the child
        // inode won't be reachable after tree merge removes it. We do NOT
        // capture parent inodes here because the merge may delete parent
        // directories, leaving stale inode pointers that crash go-fuse.
        deletedChildren := make(map[string]*fs.Inode) // filePath → child inode
        for name := range oldFileNames {
                if _, inNew := newFiles[name]; !inNew {
                        parentInode, basename := r.findParentInode(name)
                        if parentInode != nil {
                                if child := parentInode.GetChild(basename); child != nil {
                                        deletedChildren[name] = child
                                }
                        }
                }
        }

        // Build new directory tree
        fileList := make([]*MKVFile, 0, len(newFiles))
        for _, f := range newFiles {
                fileList = append(fileList, f)
        }
        newTree := BuildDirectoryTree(fileList, r.verbose, r.readerFactory, r.permStore)

        // Update flat files map in place (preserves pointer identity for cached inodes)
        r.mu.Lock()
        for name := range r.files {
                if _, inNew := newFiles[name]; !inNew {
                        delete(r.files, name)
                }
        }
        for name, newFile := range newFiles {
                if existingFile, ok := r.files[name]; ok {
                        existingFile.mu.Lock()
                        existingFile.updateFrom(newFile)
                        existingFile.mu.Unlock()
                } else {
                        r.files[name] = newFile
                }
        }
        r.mu.Unlock()

        // Merge new tree into existing tree in place
        mergeDirectoryTree(r.rootDir, newTree)

        // After merge: capture all notifications using the post-merge tree.
        // Parent inodes are now resolved against the live tree, so we never
        // reference deleted directory inodes. If a parent directory was removed
        // by the merge, findParentInode returns nil and we skip the notification
        // — the directory removal already invalidates its children in the kernel.
        var notifications []reloadNotification
        changedDirs := make(map[*fs.Inode]bool)
        for name := range oldFileNames {
                if _, inNew := newFiles[name]; !inNew {
                        parentInode, basename := r.findParentInode(name)
                        if parentInode != nil {
                                notifications = append(notifications, reloadNotification{
                                        parent:   parentInode,
                                        child:    deletedChildren[name],
                                        name:     basename,
                                        isDelete: true,
                                })
                                changedDirs[parentInode] = true
                                markAncestorDirs(parentInode, changedDirs)
                        }
                }
        }
        for name := range newFiles {
                if !oldFileNames[name] {
                        parentInode, basename := r.findParentInode(name)
                        if parentInode != nil {
                                notifications = append(notifications, reloadNotification{
                                        parent:   parentInode,
                                        name:     basename,
                                        isDelete: false,
                                })
                                changedDirs[parentInode] = true
                                markAncestorDirs(parentInode, changedDirs)
                        }
                }
        }

        // Reload permissions and clean up stale entries
        if r.permStore != nil {
                if err := r.permStore.Load(); err != nil {
                        logFn("warning: failed to reload permissions: %v", err)
                } else {
                        validFiles, validDirs := r.collectValidPaths()
                        removed := r.permStore.CleanupStale(validFiles, validDirs)
                        if removed > 0 {
                                logFn("cleaned up %d stale permission entries", removed)
                                if err := r.permStore.Save(); err != nil {
                                        logFn("warning: failed to save permissions after cleanup: %v", err)
                                }
                        }
                }
        }

        logFn("reload complete: %d files", len(newFiles))

        // Emit FUSE kernel notifications. Must be called after all filesystem
        // locks are released — go-fuse may call back into the FS during
        // notification processing, which would deadlock if locks were held.
        r.emitReloadNotifications(notifications, changedDirs, logFn)

        return nil
}

// Files returns a snapshot of the current file set. Used by SourceWatcher
// to build reverse mappings from source files to virtual files. Returns a
// defensive copy to avoid data races with concurrent Reload() calls.
func (r *MKVFSRoot) Files() map[string]*MKVFile {
        r.mu.RLock()
        defer r.mu.RUnlock()
        out := make(map[string]*MKVFile, len(r.files))
        for k, v := range r.files {
                out[k] = v
        }
        return out
}

// SetMounted marks the filesystem as mounted, enabling FUSE kernel
// notifications during config reload. Must be called after fs.Mount()
// succeeds.
func (r *MKVFSRoot) SetMounted() {
        r.mounted.Store(true)
}

// emitReloadNotifications sends FUSE kernel notifications for files that
// were added or removed during a config reload.
func (r *MKVFSRoot) emitReloadNotifications(notifications []reloadNotification, changedDirs map[*fs.Inode]bool, logFn func(string, ...interface{})) {
        if len(notifications) == 0 || !r.mounted.Load() {
                return
        }

        var deleted, invalidated int
        for _, n := range notifications {
                if n.isDelete {
                        if n.child != nil {
                                // NotifyDelete sends a real IN_DELETE inotify event
                                if errno := n.parent.NotifyDelete(n.name, n.child); errno == 0 {
                                        deleted++
                                }
                        } else {
                                // Child inode was never cached by kernel — just invalidate entry
                                if errno := n.parent.NotifyEntry(n.name); errno == 0 {
                                        invalidated++
                                }
                        }
                } else {
                        // NotifyEntry invalidates the kernel's dentry cache so the
                        // new file is visible on next lookup/readdir.
                        if errno := n.parent.NotifyEntry(n.name); errno == 0 {
                                invalidated++
                        }
                }
        }

        // Invalidate readdir cache for all directories that had changes.
        // Skip uninitialized inodes (Ino==0) as a safety net — these should
        // not appear here after the findParentInode fix, but guard anyway.
        for dirInode := range changedDirs {
                if dirInode.StableAttr().Ino != 0 {
                        dirInode.NotifyContent(0, 0)
                }
        }

        if deleted > 0 || invalidated > 0 {
                logFn("kernel notifications: %d deleted, %d invalidated, %d dirs", deleted, invalidated, len(changedDirs))
        }
}

// collectValidPaths returns maps of all valid file and directory paths.
func (r *MKVFSRoot) collectValidPaths() (files, dirs map[string]bool) {
        files = make(map[string]bool)
        dirs = make(map[string]bool)

        if r.rootDir == nil {
                return files, dirs
        }

        r.collectPathsRecursive(r.rootDir, files, dirs)
        return files, dirs
}

func (r *MKVFSRoot) collectPathsRecursive(node *MKVFSDirNode, files, dirs map[string]bool) {
        node.mu.RLock()
        defer node.mu.RUnlock()

        // Add this directory (including root with empty path)
        dirs[node.path] = true

        // Add files
        for name := range node.files {
                var filePath string
                if node.path == "" {
                        filePath = name
                } else {
                        filePath = node.path + "/" + name
                }
                files[filePath] = true
        }

        // Recurse into subdirectories
        for _, subdir := range node.subdirs {
                r.collectPathsRecursive(subdir, files, dirs)
        }
}

// Getattr implements fs.NodeGetattrer - returns attributes for the root directory.
// This ensures the root directory uses permissions from the permission store,
// consistent with all subdirectories.
func (r *MKVFSRoot) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno {
        now := time.Now()

        uid, gid, mode := getDirPerms(r.permStore, "")

        out.Mode = fuse.S_IFDIR | mode
        out.Uid = uid
        out.Gid = gid
        out.Atime = uint64(now.Unix())
        out.Mtime = uint64(now.Unix())
        out.Ctime = uint64(now.Unix())
        out.Nlink = 2
        if r.rootDir != nil {
                r.rootDir.mu.RLock()
                out.Nlink += uint32(len(r.rootDir.subdirs))
                r.rootDir.mu.RUnlock()
        }
        return 0
}

// Readdir implements fs.NodeReaddirer - lists files in the root directory.
// Delegates to the directory tree for hierarchical listing.
func (r *MKVFSRoot) Readdir(ctx context.Context) (fs.DirStream, syscall.Errno) {
        // Permission checks are handled by the kernel via default_permissions mount option.
        // This properly checks supplementary groups and matches real filesystem behavior.

        if r.rootDir != nil {
                return r.rootDir.readdirInternal(ctx)
        }

        // Fallback to flat listing if no directory tree (shouldn't happen)
        r.mu.RLock()
        defer r.mu.RUnlock()

        if r.verbose {
                log.Printf("Readdir: listing %d files (flat)", len(r.files))
        }

        entries := make([]fuse.DirEntry, 0, len(r.files))
        for name := range r.files {
                if r.verbose {
                        log.Printf("Readdir: adding %s", name)
                }
                entries = append(entries, fuse.DirEntry{
                        Name: name,
                        Mode: fuse.S_IFREG,
                })
        }
        return fs.NewListDirStream(entries), 0
}

// Lookup implements fs.NodeLookuper - looks up a file or directory by name.
// Uses the directory tree for hierarchical lookup.
func (r *MKVFSRoot) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fs.Inode, syscall.Errno) {
        // Permission checks are handled by the kernel via default_permissions mount option.

        if r.rootDir != nil {
                r.rootDir.mu.RLock()
                defer r.rootDir.mu.RUnlock()

                // Check subdirectories first
                if subdir, ok := r.rootDir.subdirs[name]; ok {
                        if r.verbose {
                                log.Printf("Lookup: found subdir %s at root", name)
                        }

                        // Lock subdir to safely access its fields
                        subdir.mu.RLock()
                        subdirCount := len(subdir.subdirs)
                        subdir.mu.RUnlock()

                        uid, gid, mode := getDirPerms(r.permStore, subdir.path)

                        now := time.Now()
                        out.Mode = fuse.S_IFDIR | mode
                        out.Uid = uid
                        out.Gid = gid
                        out.Atime = uint64(now.Unix())
                        out.Mtime = uint64(now.Unix())
                        out.Ctime = uint64(now.Unix())
                        out.Nlink = 2 + uint32(subdirCount)

                        stable := fs.StableAttr{
                                Mode: fuse.S_IFDIR,
                                Ino:  hashString(subdir.path),
                        }
                        child := r.NewPersistentInode(ctx, subdir, stable)
                        return child, 0
                }

                // Check files
                if file, ok := r.rootDir.files[name]; ok {
                        if r.verbose {
                                log.Printf("Lookup: found file %s at root (size=%d)", name, file.Size)
                        }

                        uid, gid, mode := getFilePerms(r.permStore, name)

                        now := time.Now()
                        out.Size = uint64(file.Size)
                        out.Mode = fuse.S_IFREG | mode
                        out.Uid = uid
                        out.Gid = gid
                        out.Atime = uint64(now.Unix())
                        out.Mtime = uint64(now.Unix())
                        out.Ctime = uint64(now.Unix())
                        out.Nlink = 1

                        node := &MKVFSNode{file: file, path: name, verbose: r.verbose, permStore: r.permStore}
                        stable := fs.StableAttr{
                                Mode: fuse.S_IFREG,
                                Ino:  hashString(name),
                        }
                        child := r.NewInode(ctx, node, stable)
                        return child, 0
                }

                if r.verbose {
                        log.Printf("Lookup: not found %s at root", name)
                }
                return nil, syscall.ENOENT
        }

        // Fallback to flat lookup if no directory tree (shouldn't happen)
        r.mu.RLock()
        file, ok := r.files[name]
        r.mu.RUnlock()

        if !ok {
                if r.verbose {
                        log.Printf("Lookup: file not found: %s", name)
                }
                return nil, syscall.ENOENT
        }

        if r.verbose {
                log.Printf("Lookup: %s (size=%d)", name, file.Size)
        }

        uid, gid, mode := getFilePerms(r.permStore, name)

        // Create a new file node
        node := &MKVFSNode{file: file, path: name, verbose: r.verbose, permStore: r.permStore}

        // Set attributes
        now := time.Now()
        out.Size = uint64(file.Size)
        out.Mode = fuse.S_IFREG | mode
        out.Uid = uid
        out.Gid = gid
        out.Atime = uint64(now.Unix())
        out.Mtime = uint64(now.Unix())
        out.Ctime = uint64(now.Unix())

        // Create inode with stable ID based on filename
        stable := fs.StableAttr{
                Mode: fuse.S_IFREG,
                Ino:  hashString(name),
        }

        child := r.NewInode(ctx, node, stable)
        return child, 0
}

package fuse

import (
        "context"
        "fmt"
        "os/exec"
        "strings"
        "sync"
        "time"

        "al.essio.dev/pkg/shellescape"
        "github.com/stuckj/mkvdup/internal/dedup"
)

// ErrorEvent describes a source integrity issue detected by the watcher.
type ErrorEvent struct {
        SourcePath    string   // absolute path of the changed source file
        AffectedFiles []string // virtual file names affected
        Event         string   // "changed", "missing", "size_changed", "checksum_mismatch", "read_error", "checksum_queue_full"
}

// ErrorNotifier batches integrity error events and executes an external
// command with placeholder substitution. Events are collected for a
// configurable batch interval; when the interval expires, the command
// is executed once with all accumulated events.
type ErrorNotifier struct {
        config dedup.ErrorCommandConfig
        logFn  func(string, ...interface{})

        mu      sync.Mutex
        pending []ErrorEvent
        timer   *time.Timer
        stopped bool
}

// NewErrorNotifier creates a notifier from the given config.
func NewErrorNotifier(config dedup.ErrorCommandConfig, logFn func(string, ...interface{})) *ErrorNotifier {
        if logFn == nil {
                logFn = func(string, ...interface{}) {}
        }
        return &ErrorNotifier{
                config: config,
                logFn:  logFn,
        }
}

// Notify adds an error event to the batch. If this is the first event in
// the batch, a timer is started. Subsequent events reset the timer so that
// rapid bursts are coalesced into a single command execution.
func (n *ErrorNotifier) Notify(event ErrorEvent) {
        n.mu.Lock()
        defer n.mu.Unlock()

        if n.stopped {
                return
        }

        n.pending = append(n.pending, event)

        // Start or reset the debounce timer.
        if n.timer == nil {
                n.timer = time.AfterFunc(n.config.BatchInterval, n.flush)
        } else {
                n.timer.Reset(n.config.BatchInterval)
        }
}

// Stop flushes any pending events and prevents future notifications.
func (n *ErrorNotifier) Stop() {
        n.mu.Lock()
        n.stopped = true
        if n.timer != nil {
                n.timer.Stop()
                n.timer = nil
        }
        events := n.pending
        n.pending = nil
        n.mu.Unlock()

        if len(events) > 0 {
                n.executeCommand(events)
        }
}

// flush is called when the debounce timer fires.
func (n *ErrorNotifier) flush() {
        n.mu.Lock()
        if n.stopped {
                n.mu.Unlock()
                return
        }
        events := n.pending
        n.pending = nil
        n.timer = nil
        n.mu.Unlock()

        if len(events) > 0 {
                n.executeCommand(events)
        }
}

// executeCommand runs the configured external command with placeholders
// substituted from the batched events. The command runs with a timeout
// and its output is logged on failure.
func (n *ErrorNotifier) executeCommand(events []ErrorEvent) {
        if len(n.config.Command.Args) == 0 {
                n.logFn("source-watch: on_error_command: no command configured, skipping")
                return
        }

        ctx, cancel := context.WithTimeout(context.Background(), n.config.Timeout)
        defer cancel()

        var cmd *exec.Cmd
        if n.config.Command.IsShell {
                // String form: run via sh -c with shell-escaped placeholder values
                cmdStr := substitutePlaceholders(n.config.Command.Args[0], events, true)
                cmd = exec.CommandContext(ctx, "sh", "-c", cmdStr)
        } else {
                // List form: substitute placeholders in each argument (no escaping needed)
                args := make([]string, len(n.config.Command.Args))
                for i, arg := range n.config.Command.Args {
                        args[i] = substitutePlaceholders(arg, events, false)
                }
                cmd = exec.CommandContext(ctx, args[0], args[1:]...)
        }

        output, err := cmd.CombinedOutput()
        if err != nil {
                n.logFn("source-watch: on_error_command failed: %v (output: %s)", err, strings.TrimSpace(string(output)))
        }
}

// substitutePlaceholders replaces %source%, %files%, and %event% in s
// with values derived from the batched events. When shellEscape is true,
// placeholder values are shell-escaped for safe use in sh -c commands.
func substitutePlaceholders(s string, events []ErrorEvent, shellEscape bool) string {
        // Build source list (newline-separated, deduplicated)
        sourceSet := make(map[string]bool)
        var sources []string
        for _, e := range events {
                if !sourceSet[e.SourcePath] {
                        sourceSet[e.SourcePath] = true
                        sources = append(sources, e.SourcePath)
                }
        }

        // Build file list (comma-separated, deduplicated)
        fileSet := make(map[string]bool)
        var files []string
        for _, e := range events {
                for _, f := range e.AffectedFiles {
                        if !fileSet[f] {
                                fileSet[f] = true
                                files = append(files, f)
                        }
                }
        }

        // Build event list
        var eventStrs []string
        if len(events) == 1 {
                eventStrs = append(eventStrs, events[0].Event)
        } else {
                for _, e := range events {
                        eventStrs = append(eventStrs, fmt.Sprintf("%s: %s", e.SourcePath, e.Event))
                }
        }

        sourceVal := strings.Join(sources, "\n")
        filesVal := strings.Join(files, ", ")
        eventVal := strings.Join(eventStrs, "\n")

        if shellEscape {
                sourceVal = shellescape.Quote(sourceVal)
                filesVal = shellescape.Quote(filesVal)
                eventVal = shellescape.Quote(eventVal)
        }

        s = strings.ReplaceAll(s, "%source%", sourceVal)
        s = strings.ReplaceAll(s, "%files%", filesVal)
        s = strings.ReplaceAll(s, "%event%", eventVal)
        return s
}

// Package fuse provides a FUSE filesystem for accessing deduplicated MKV files.
package fuse

import (
        "context"
        "fmt"
        "log"
        "os"
        "os/user"
        "path/filepath"
        "strconv"
        "sync"
        "syscall"

        "github.com/hanwen/go-fuse/v2/fuse"
        "gopkg.in/yaml.v3"
)

// Perms holds uid, gid, and mode for a file or directory.
// Nil values indicate the field should inherit from defaults.
type Perms struct {
        UID  *uint32 `yaml:"uid,omitempty"`
        GID  *uint32 `yaml:"gid,omitempty"`
        Mode *uint32 `yaml:"mode,omitempty"`
}

// Defaults holds default permissions for files and directories.
type Defaults struct {
        FileUID  uint32 `yaml:"file_uid"`
        FileGID  uint32 `yaml:"file_gid"`
        FileMode uint32 `yaml:"file_mode"`
        DirUID   uint32 `yaml:"dir_uid"`
        DirGID   uint32 `yaml:"dir_gid"`
        DirMode  uint32 `yaml:"dir_mode"`
}

// DefaultPerms returns the default permission values.
func DefaultPerms() Defaults {
        return Defaults{
                FileUID:  0,
                FileGID:  0,
                FileMode: 0444,
                DirUID:   0,
                DirGID:   0,
                DirMode:  0555,
        }
}

// permissionsFile is the structure of the permissions YAML file.
type permissionsFile struct {
        Defaults    Defaults          `yaml:"defaults"`
        Files       map[string]*Perms `yaml:"files,omitempty"`
        Directories map[string]*Perms `yaml:"directories,omitempty"`
}

// PermissionStore manages file/directory permissions with persistence.
type PermissionStore struct {
        path     string
        defaults Defaults
        files    map[string]*Perms
        dirs     map[string]*Perms
        mu       sync.RWMutex
        verbose  bool
}

// NewPermissionStore creates a new permission store.
// If path is empty, permissions will not be persisted.
func NewPermissionStore(path string, defaults Defaults, verbose bool) *PermissionStore {
        return &PermissionStore{
                path:     path,
                defaults: defaults,
                files:    make(map[string]*Perms),
                dirs:     make(map[string]*Perms),
                verbose:  verbose,
        }
}

// Load loads permissions from the file.
// If the file doesn't exist, the store remains empty (using defaults).
func (s *PermissionStore) Load() error {
        if s.path == "" {
                return nil
        }

        data, err := os.ReadFile(s.path)
        if err != nil {
                if os.IsNotExist(err) {
                        if s.verbose {
                                log.Printf("Permissions file %s does not exist, using defaults", s.path)
                        }
                        return nil
                }
                return fmt.Errorf("read permissions file: %w", err)
        }

        var pf permissionsFile
        if err := yaml.Unmarshal(data, &pf); err != nil {
                return fmt.Errorf("parse permissions file: %w", err)
        }

        s.mu.Lock()
        defer s.mu.Unlock()

        // Override defaults if specified in file
        if pf.Defaults.FileMode != 0 || pf.Defaults.FileUID != 0 || pf.Defaults.FileGID != 0 ||
                pf.Defaults.DirMode != 0 || pf.Defaults.DirUID != 0 || pf.Defaults.DirGID != 0 {
                // Only override non-zero values from file
                if pf.Defaults.FileMode != 0 {
                        s.defaults.FileMode = pf.Defaults.FileMode
                }
                if pf.Defaults.FileUID != 0 {
                        s.defaults.FileUID = pf.Defaults.FileUID
                }
                if pf.Defaults.FileGID != 0 {
                        s.defaults.FileGID = pf.Defaults.FileGID
                }
                if pf.Defaults.DirMode != 0 {
                        s.defaults.DirMode = pf.Defaults.DirMode
                }
                if pf.Defaults.DirUID != 0 {
                        s.defaults.DirUID = pf.Defaults.DirUID
                }
                if pf.Defaults.DirGID != 0 {
                        s.defaults.DirGID = pf.Defaults.DirGID
                }
        }

        if pf.Files != nil {
                s.files = pf.Files
        }
        if pf.Directories != nil {
                s.dirs = pf.Directories
        }

        if s.verbose {
                log.Printf("Loaded permissions: %d files, %d directories", len(s.files), len(s.dirs))
        }

        return nil
}

// Save saves permissions to the file.
func (s *PermissionStore) Save() error {
        if s.path == "" {
                return nil
        }

        s.mu.RLock()
        // Deep copy the maps to avoid data races during marshalling.
        // We copy both the map and the Perms values to ensure complete isolation.
        pf := permissionsFile{
                Defaults: s.defaults,
        }
        if s.files != nil {
                pf.Files = make(map[string]*Perms, len(s.files))
                for k, v := range s.files {
                        if v != nil {
                                permsCopy := *v // copy the Perms struct
                                pf.Files[k] = &permsCopy
                        }
                }
        }
        if s.dirs != nil {
                pf.Directories = make(map[string]*Perms, len(s.dirs))
                for k, v := range s.dirs {
                        if v != nil {
                                permsCopy := *v // copy the Perms struct
                                pf.Directories[k] = &permsCopy
                        }
                }
        }
        s.mu.RUnlock()

        // Create parent directory if needed
        dir := filepath.Dir(s.path)
        if err := os.MkdirAll(dir, 0755); err != nil {
                return fmt.Errorf("create permissions directory: %w", err)
        }

        data, err := yaml.Marshal(&pf)
        if err != nil {
                return fmt.Errorf("marshal permissions: %w", err)
        }

        if err := os.WriteFile(s.path, data, 0644); err != nil {
                return fmt.Errorf("write permissions file: %w", err)
        }

        if s.verbose {
                log.Printf("Saved permissions to %s", s.path)
        }

        return nil
}

// GetFilePerms returns the effective permissions for a file.
// Returns uid, gid, mode with defaults applied for any unset values.
func (s *PermissionStore) GetFilePerms(path string) (uid, gid, mode uint32) {
        s.mu.RLock()
        defer s.mu.RUnlock()

        uid = s.defaults.FileUID
        gid = s.defaults.FileGID
        mode = s.defaults.FileMode

        if p, ok := s.files[path]; ok {
                if p.UID != nil {
                        uid = *p.UID
                }
                if p.GID != nil {
                        gid = *p.GID
                }
                if p.Mode != nil {
                        mode = *p.Mode
                }
        }

        return uid, gid, mode
}

// GetDirPerms returns the effective permissions for a directory.
// Returns uid, gid, mode with defaults applied for any unset values.
func (s *PermissionStore) GetDirPerms(path string) (uid, gid, mode uint32) {
        s.mu.RLock()
        defer s.mu.RUnlock()

        uid = s.defaults.DirUID
        gid = s.defaults.DirGID
        mode = s.defaults.DirMode

        if p, ok := s.dirs[path]; ok {
                if p.UID != nil {
                        uid = *p.UID
                }
                if p.GID != nil {
                        gid = *p.GID
                }
                if p.Mode != nil {
                        mode = *p.Mode
                }
        }

        return uid, gid, mode
}

// SetFilePerms sets permissions for a file.
// Only non-nil values are updated; nil values leave existing values unchanged.
// Automatically saves to disk.
func (s *PermissionStore) SetFilePerms(path string, uid, gid *uint32, mode *uint32) error {
        s.mu.Lock()

        // If all values are nil, nothing to do
        if uid == nil && gid == nil && mode == nil {
                s.mu.Unlock()
                return nil
        }

        p, ok := s.files[path]
        if !ok {
                p = &Perms{}
                s.files[path] = p
        }

        // Only update non-nil values; copy values so the store owns their lifetime.
        if uid != nil {
                v := *uid
                p.UID = &v
        }
        if gid != nil {
                v := *gid
                p.GID = &v
        }
        if mode != nil {
                v := *mode
                p.Mode = &v
        }

        s.mu.Unlock()

        if s.verbose {
                log.Printf("SetFilePerms: %s uid=%v gid=%v mode=%v", path, uid, gid, mode)
        }

        return s.Save()
}

// RemoveFilePerms removes all permission overrides for a file.
// The file will use default permissions. Automatically saves to disk.
func (s *PermissionStore) RemoveFilePerms(path string) error {
        s.mu.Lock()
        delete(s.files, path)
        s.mu.Unlock()

        if s.verbose {
                log.Printf("RemoveFilePerms: %s", path)
        }

        return s.Save()
}

// SetDirPerms sets permissions for a directory.
// Only non-nil values are updated; nil values leave existing values unchanged.
// Automatically saves to disk.
func (s *PermissionStore) SetDirPerms(path string, uid, gid *uint32, mode *uint32) error {
        s.mu.Lock()

        // If all values are nil, nothing to do
        if uid == nil && gid == nil && mode == nil {
                s.mu.Unlock()
                return nil
        }

        p, ok := s.dirs[path]
        if !ok {
                p = &Perms{}
                s.dirs[path] = p
        }

        // Only update non-nil values; copy values so the store owns their lifetime.
        if uid != nil {
                v := *uid
                p.UID = &v
        }
        if gid != nil {
                v := *gid
                p.GID = &v
        }
        if mode != nil {
                v := *mode
                p.Mode = &v
        }

        s.mu.Unlock()

        if s.verbose {
                log.Printf("SetDirPerms: %s uid=%v gid=%v mode=%v", path, uid, gid, mode)
        }

        return s.Save()
}

// RemoveDirPerms removes all permission overrides for a directory.
// The directory will use default permissions. Automatically saves to disk.
func (s *PermissionStore) RemoveDirPerms(path string) error {
        s.mu.Lock()
        delete(s.dirs, path)
        s.mu.Unlock()

        if s.verbose {
                log.Printf("RemoveDirPerms: %s", path)
        }

        return s.Save()
}

// CleanupStale removes entries for paths that don't exist in the mounted filesystem.
// validFiles and validDirs are maps of valid paths (value is ignored, just checking keys).
// Returns the number of stale entries removed.
func (s *PermissionStore) CleanupStale(validFiles, validDirs map[string]bool) int {
        s.mu.Lock()
        defer s.mu.Unlock()

        removed := 0

        // Clean up stale file entries
        for path := range s.files {
                if !validFiles[path] {
                        delete(s.files, path)
                        removed++
                        if s.verbose {
                                log.Printf("Removed stale file permission entry: %s", path)
                        }
                }
        }

        // Clean up stale directory entries
        for path := range s.dirs {
                if !validDirs[path] {
                        delete(s.dirs, path)
                        removed++
                        if s.verbose {
                                log.Printf("Removed stale directory permission entry: %s", path)
                        }
                }
        }

        return removed
}

// Defaults returns the current default permissions.
func (s *PermissionStore) Defaults() Defaults {
        s.mu.RLock()
        defer s.mu.RUnlock()
        return s.defaults
}

// ResolvePermissionsPath determines which permissions file to use.
// Priority:
//  1. explicitPath (from --permissions-file flag)
//  2. ~/.config/mkvdup/permissions.yaml (if exists) - for both root and non-root
//  3. /etc/mkvdup/permissions.yaml (if exists AND running as root)
//  4. Default based on euid: root uses /etc/, non-root uses ~/.config/
//
// Non-root users always get a user-writable path (unless explicitly overridden)
// to avoid EACCES errors when saving permission changes.
func ResolvePermissionsPath(explicitPath string) string {
        if explicitPath != "" {
                return explicitPath
        }

        home, err := os.UserHomeDir()
        userPath := ""
        if err == nil {
                userPath = filepath.Join(home, ".config", "mkvdup", "permissions.yaml")
        }

        // Check user config - takes priority for both root and non-root
        if userPath != "" {
                if _, err := os.Stat(userPath); err == nil {
                        return userPath
                }
        }

        systemPath := "/etc/mkvdup/permissions.yaml"

        // For root: check system config, then default to system path
        if os.Geteuid() == 0 {
                if _, err := os.Stat(systemPath); err == nil {
                        return systemPath
                }
                return systemPath
        }

        // For non-root: always use user path to ensure writability.
        // Do NOT use system path even if it exists, as non-root users
        // typically cannot write to /etc/ and chmod/chown operations
        // would fail with EACCES.
        if userPath != "" {
                return userPath
        }

        // Fallback if no home directory (unusual for non-root)
        return systemPath
}

// CallerInfo represents the calling process's credentials.
type CallerInfo struct {
        Uid uint32
        Gid uint32
}

// testCallerHook is set by test code to allow injecting caller credentials.
// This is nil in production, ensuring only real FUSE contexts are trusted.
var testCallerHook func(context.Context) (CallerInfo, bool)

// GetCaller extracts caller credentials from the FUSE context.
// Returns (caller, true) if credentials are available, (zero, false) otherwise.
// Callers should deny access when ok is false to fail closed.
func GetCaller(ctx context.Context) (CallerInfo, bool) {
        if caller, ok := fuse.FromContext(ctx); ok {
                return CallerInfo{Uid: caller.Uid, Gid: caller.Gid}, true
        }
        // Check for test-injected caller (only available in tests)
        if testCallerHook != nil {
                if caller, ok := testCallerHook(ctx); ok {
                        return caller, true
                }
        }
        // Fail closed: return zero value and false to indicate no credentials
        return CallerInfo{}, false
}

// IsRoot returns true if the caller is root (uid 0).
func (c CallerInfo) IsRoot() bool {
        return c.Uid == 0
}

// CheckChown verifies the caller can change file ownership.
// Returns 0 if allowed, syscall.EPERM if denied.
// Only root can change UID. Only root or file owner can change GID.
// Non-root owners can change GID to any group they are a member of
// (primary or supplementary). No-op changes (newUID == fileUID or
// newGID == fileGID) are always allowed.
func CheckChown(caller CallerInfo, fileUID, fileGID uint32, newUID, newGID *uint32) syscall.Errno {
        // Only root can change UID to a different user
        if newUID != nil && *newUID != fileUID && !caller.IsRoot() {
                return syscall.EPERM
        }

        // GID changes:
        // - No-op (nil or same as current) is always allowed
        // - Root can change to any GID
        // - Non-root owner can change to any group they belong to
        if newGID != nil && *newGID != fileGID {
                if caller.IsRoot() {
                        return 0
                }
                // Non-root: must be owner AND must be a member of target group
                if caller.Uid != fileUID || !isGroupMember(caller.Uid, caller.Gid, *newGID) {
                        return syscall.EPERM
                }
        }

        return 0
}

// groupMembershipFunc is the function used to check group membership.
// It can be overridden in tests to avoid OS-level lookups.
var groupMembershipFunc = defaultGroupMembership

// isGroupMember checks if a user is a member of the given group.
// This checks the primary GID and supplementary groups.
func isGroupMember(uid, primaryGID, targetGID uint32) bool {
        return groupMembershipFunc(uid, primaryGID, targetGID)
}

// defaultGroupMembership checks group membership by looking up the user's
// groups from the OS.
func defaultGroupMembership(uid, primaryGID, targetGID uint32) bool {
        // Primary GID is always a member
        if targetGID == primaryGID {
                return true
        }

        // Look up supplementary groups from the OS
        u, err := user.LookupId(strconv.FormatUint(uint64(uid), 10))
        if err != nil {
                return false
        }
        groupIDs, err := u.GroupIds()
        if err != nil {
                return false
        }
        targetStr := strconv.FormatUint(uint64(targetGID), 10)
        for _, gid := range groupIDs {
                if gid == targetStr {
                        return true
                }
        }
        return false
}

// CheckChmod verifies the caller can change file mode.
// Returns 0 if allowed, syscall.EPERM if denied.
// Only root or file owner can chmod.
func CheckChmod(caller CallerInfo, fileUID uint32) syscall.Errno {
        if caller.IsRoot() || caller.Uid == fileUID {
                return 0
        }
        return syscall.EPERM
}

package fuse

import (
        "log"
        "path"
        "strings"
)

// BuildDirectoryTree creates a directory tree from files with path-containing names.
// Directories are auto-created for each path component.
// Files with names like "Movies/Action/film.mkv" will create the directory hierarchy.
//
// Path handling:
//   - Leading slashes are stripped (absolute paths become relative)
//   - Paths are cleaned (e.g., "foo//bar" becomes "foo/bar")
//   - Only forward slashes (/) are treated as path separators
//   - Paths containing ".." components are rejected
//   - Empty filenames are rejected
//
// Conflicts:
//   - Duplicate paths: later file wins, warning logged
//   - File/directory collision: directory wins, file skipped with warning
func BuildDirectoryTree(files []*MKVFile, verbose bool, readerFactory ReaderFactory, permStore *PermissionStore) *MKVFSDirNode {
        root := &MKVFSDirNode{
                name:          "",
                path:          "",
                files:         make(map[string]*MKVFile),
                subdirs:       make(map[string]*MKVFSDirNode),
                verbose:       verbose,
                readerFactory: readerFactory,
                permStore:     permStore,
        }

        for _, file := range files {
                insertFile(root, file, verbose, readerFactory, permStore)
        }

        return root
}

// insertFile inserts a file into the directory tree, creating directories as needed.
func insertFile(root *MKVFSDirNode, file *MKVFile, verbose bool, readerFactory ReaderFactory, permStore *PermissionStore) {
        // Validate: reject paths with ".." components (security)
        if strings.Contains(file.Name, "..") {
                log.Printf("Warning: skipping file with invalid path (contains '..'): %s", file.Name)
                return
        }

        // Clean and split the path
        cleanPath := path.Clean(file.Name)
        parts := strings.Split(cleanPath, "/")

        // Filter out empty parts (handles leading slashes and multiple slashes)
        validParts := make([]string, 0, len(parts))
        for _, p := range parts {
                if p != "" && p != "." {
                        validParts = append(validParts, p)
                }
        }

        // Validate: reject empty filenames
        if len(validParts) == 0 {
                log.Printf("Warning: skipping file with empty name: %q", file.Name)
                return
        }

        fileName := validParts[len(validParts)-1]
        if fileName == "" {
                log.Printf("Warning: skipping file with empty filename: %q", file.Name)
                return
        }

        // Navigate/create directories for each path component except the last (filename)
        current := root
        for i := 0; i < len(validParts)-1; i++ {
                dirName := validParts[i]

                current.mu.Lock()
                // Check for file/directory collision: if a file exists with this name, skip
                if _, fileExists := current.files[dirName]; fileExists {
                        log.Printf("Warning: path component %q conflicts with existing file, skipping: %s", dirName, file.Name)
                        current.mu.Unlock()
                        return
                }

                subdir, exists := current.subdirs[dirName]
                if !exists {
                        // Create new directory node
                        var newPath string
                        if current.path == "" {
                                newPath = dirName
                        } else {
                                newPath = current.path + "/" + dirName
                        }
                        subdir = &MKVFSDirNode{
                                name:          dirName,
                                path:          newPath,
                                files:         make(map[string]*MKVFile),
                                subdirs:       make(map[string]*MKVFSDirNode),
                                verbose:       verbose,
                                readerFactory: readerFactory,
                                permStore:     permStore,
                        }
                        current.subdirs[dirName] = subdir
                }
                current.mu.Unlock()
                current = subdir
        }

        // Insert the file into the final directory
        current.mu.Lock()
        defer current.mu.Unlock()

        // Check for file/directory collision: if a directory exists with this name, skip the file
        if _, dirExists := current.subdirs[fileName]; dirExists {
                log.Printf("Warning: file %q conflicts with existing directory, skipping", file.Name)
                return
        }

        // Check for duplicate: warn if overwriting
        if existing, exists := current.files[fileName]; exists {
                log.Printf("Warning: duplicate path %q, replacing %s with %s", file.Name, existing.DedupPath, file.DedupPath)
        }

        current.files[fileName] = file
}

// mergeDirectoryTree merges newTree's contents into existing's maps in place.
// This is necessary because go-fuse caches persistent inode objects by inode
// number — swapping the root directory won't affect already-cached inodes.
// Instead, we update existing MKVFSDirNode objects' files and subdirs maps
// so cached inodes see the new data.
func mergeDirectoryTree(existing, newTree *MKVFSDirNode) {
        existing.mu.Lock()
        defer existing.mu.Unlock()

        // Remove files that are no longer present
        for name := range existing.files {
                if _, inNew := newTree.files[name]; !inNew {
                        delete(existing.files, name)
                }
        }

        // Add or update files (update in place to preserve pointer identity for cached inodes)
        for name, newFile := range newTree.files {
                if existingFile, ok := existing.files[name]; ok {
                        existingFile.mu.Lock()
                        existingFile.updateFrom(newFile)
                        existingFile.mu.Unlock()
                } else {
                        existing.files[name] = newFile
                }
        }

        // Remove subdirectories that are no longer present
        for name := range existing.subdirs {
                if _, inNew := newTree.subdirs[name]; !inNew {
                        delete(existing.subdirs, name)
                }
        }

        // Add or recursively merge subdirectories
        for name, newSubdir := range newTree.subdirs {
                existingSubdir, exists := existing.subdirs[name]
                if !exists {
                        existing.subdirs[name] = newSubdir
                } else {
                        mergeDirectoryTree(existingSubdir, newSubdir)
                }
        }
}

package fuse

import (
        "fmt"
        "io"
        "os"
        "path/filepath"
        "strings"
        "sync"
        "time"

        "github.com/cespare/xxhash/v2"
        "github.com/fsnotify/fsnotify"
        "github.com/stuckj/mkvdup/internal/dedup"
)

// Default poll interval for network filesystems where inotify doesn't work.
const defaultPollInterval = 60 * time.Second

// checksumRequest is a queued checksum verification job.
type checksumRequest struct {
        absPath          string
        expectedChecksum uint64
        expectedSize     int64
        affected         []*MKVFile
        gen              uint64 // generation stamp; stale requests are skipped
}

// SourceWatcher monitors source files for changes and takes action when
// modifications are detected. It uses inotify for local filesystems and
// falls back to polling for network filesystems (NFS, CIFS/SMB).
type SourceWatcher struct {
        watcher *fsnotify.Watcher

        // reverse maps absolute source file paths to the virtual files that use them.
        reverse map[string][]*MKVFile

        // checksums maps absolute source file paths to expected xxhash values.
        checksums map[string]uint64

        // sizes maps absolute source file paths to expected file sizes.
        sizes map[string]int64

        // pollFiles maps absolute source file paths to their last known mtime
        // for directories that use polling instead of inotify.
        pollFiles map[string]time.Time

        action string // "warn", "disable", "checksum"
        logFn  func(string, ...interface{})
        mu     sync.RWMutex

        // checksumCh queues checksum verification requests so they run
        // sequentially in a single worker goroutine, avoiding I/O storms
        // when many source files change at once.
        checksumCh chan checksumRequest

        // checksumPending tracks source paths with a queued checksum request,
        // preventing duplicate queue entries for the same file. The worker
        // clears the flag when it starts processing, so new events that arrive
        // during verification are still queued.
        checksumPending map[string]bool

        // updateGen is incremented on each Update() call. Checksum requests
        // carry the generation they were created in; the worker skips requests
        // whose generation doesn't match, preventing stale verifications from
        // a previous config from disabling files after a reload.
        updateGen uint64

        pollInterval time.Duration // interval for network FS polling (0 = defaultPollInterval)

        notifier *ErrorNotifier // optional external command notifier

        stopCh chan struct{}
        wg     sync.WaitGroup
}

// NewSourceWatcher creates a new source file watcher with the given action.
// If pollInterval <= 0, defaultPollInterval is used.
// If onErrorCommand is non-nil, an ErrorNotifier is created to execute the
// configured command when integrity issues are detected.
// The watcher is not started until Start() is called.
func NewSourceWatcher(action string, pollInterval time.Duration, onErrorCommand *dedup.ErrorCommandConfig, logFn func(string, ...interface{})) (*SourceWatcher, error) {
        watcher, err := fsnotify.NewWatcher()
        if err != nil {
                return nil, fmt.Errorf("create fsnotify watcher: %w", err)
        }

        if logFn == nil {
                logFn = func(string, ...interface{}) {}
        }

        if pollInterval <= 0 {
                pollInterval = defaultPollInterval
        }

        var notifier *ErrorNotifier
        if onErrorCommand != nil {
                notifier = NewErrorNotifier(*onErrorCommand, logFn)
        }

        return &SourceWatcher{
                watcher:         watcher,
                reverse:         make(map[string][]*MKVFile),
                checksums:       make(map[string]uint64),
                sizes:           make(map[string]int64),
                pollFiles:       make(map[string]time.Time),
                action:          action,
                logFn:           logFn,
                checksumCh:      make(chan checksumRequest, 256),
                checksumPending: make(map[string]bool),
                pollInterval:    pollInterval,
                notifier:        notifier,
                stopCh:          make(chan struct{}),
        }, nil
}

// Update rebuilds the watcher's source file mappings from the current file set.
// It removes old watches and sets up new ones. Called on mount and after reload.
//
// For each MKVFile, the readerFactory is used to read the dedup file header
// (lazy read, no full initialization) to get the source file list.
//
// The method minimizes lock hold time: maps are built without the lock,
// swapped in briefly under the lock, and then inotify watches and os.Stat
// calls happen without the lock.
func (sw *SourceWatcher) Update(files map[string]*MKVFile, readerFactory ReaderFactory) {
        // Phase 1: Build new maps without holding the lock. This involves
        // I/O (reading dedup headers) that should not block event handling.
        newReverse := make(map[string][]*MKVFile)
        newChecksums := make(map[string]uint64)
        newSizes := make(map[string]int64)
        watchDirs := make(map[string]bool)

        for _, file := range files {
                reader, err := readerFactory.NewReaderLazy(file.DedupPath, file.SourceDir)
                if err != nil {
                        sw.logFn("source-watch: warning: cannot read dedup header for %s: %v", file.Name, err)
                        continue
                }
                sourceFiles := reader.SourceFileInfo()
                reader.Close()

                cleanSourceDir := filepath.Clean(file.SourceDir)
                if cleanSourceDir[len(cleanSourceDir)-1] != filepath.Separator {
                        cleanSourceDir += string(filepath.Separator)
                }
                for _, sf := range sourceFiles {
                        absPath := filepath.Clean(filepath.Join(file.SourceDir, sf.RelativePath))
                        if !strings.HasPrefix(absPath, cleanSourceDir) {
                                sw.logFn("source-watch: warning: skipping source file with path traversal: %s", sf.RelativePath)
                                continue
                        }
                        newReverse[absPath] = append(newReverse[absPath], file)
                        newChecksums[absPath] = sf.Checksum
                        newSizes[absPath] = sf.Size
                        watchDirs[filepath.Dir(absPath)] = true
                }
        }

        // Phase 2: Swap maps and drain stale checksum queue under the lock.
        sw.mu.Lock()
        oldDirs := sw.watchedDirs()

        // Drain any stale checksum requests from a previous configuration.
drain:
        for {
                select {
                case <-sw.checksumCh:
                default:
                        break drain
                }
        }
        sw.checksumPending = make(map[string]bool)
        sw.updateGen++

        sw.reverse = newReverse
        sw.checksums = newChecksums
        sw.sizes = newSizes
        sw.pollFiles = make(map[string]time.Time)
        sw.mu.Unlock()

        // Phase 3: Update inotify watches without the lock.
        // fsnotify.Watcher methods are thread-safe.
        for dir := range oldDirs {
                sw.watcher.Remove(dir)
        }

        // Precompute files per directory so polling setup is O(files), not O(dirs×files).
        pathsByDir := make(map[string][]string)
        for absPath := range newReverse {
                dir := filepath.Dir(absPath)
                pathsByDir[dir] = append(pathsByDir[dir], absPath)
        }

        newPollFiles := make(map[string]time.Time)
        for dir := range watchDirs {
                if isNetworkFS(dir) {
                        sw.logFn("source-watch: %s is on a network filesystem, using polling", dir)
                        for _, absPath := range pathsByDir[dir] {
                                if info, err := os.Stat(absPath); err == nil {
                                        newPollFiles[absPath] = info.ModTime()
                                } else {
                                        // File currently missing/unavailable — use zero mtime so
                                        // pollCheck detects it appearing (or triggers handleChange
                                        // via its stat-error path).
                                        newPollFiles[absPath] = time.Time{}
                                }
                        }
                } else {
                        if err := sw.watcher.Add(dir); err != nil {
                                sw.logFn("source-watch: warning: cannot watch %s: %v", dir, err)
                        }
                }
        }

        // Phase 4: Set poll files under the lock.
        if len(newPollFiles) > 0 {
                sw.mu.Lock()
                sw.pollFiles = newPollFiles
                sw.mu.Unlock()
        }

        sw.logFn("source-watch: monitoring %d source files in %d directories (action=%s)",
                len(newReverse), len(watchDirs), sw.action)
}

// watchedDirs returns the set of currently watched directories.
func (sw *SourceWatcher) watchedDirs() map[string]bool {
        dirs := make(map[string]bool)
        for path := range sw.reverse {
                dirs[filepath.Dir(path)] = true
        }
        return dirs
}

// Start begins the event processing loop. Must be called after Update().
func (sw *SourceWatcher) Start() {
        sw.wg.Add(1)
        go sw.eventLoop()

        // Start checksum worker (single goroutine processes queue sequentially)
        if sw.action == "checksum" {
                sw.wg.Add(1)
                go sw.checksumWorker()
        }

        // Always start poller — it no-ops when pollFiles is empty, but must
        // be running so that network FS dirs added via Update() after reload
        // are polled without requiring a restart.
        sw.wg.Add(1)
        go sw.pollLoop()
}

// Stop stops the watcher and waits for goroutines to exit.
// If a notifier is configured, it is stopped (flushing any pending events).
func (sw *SourceWatcher) Stop() {
        close(sw.stopCh)
        sw.watcher.Close()
        sw.wg.Wait()
        if sw.notifier != nil {
                sw.notifier.Stop()
        }
}

// notify sends an error event to the notifier, if configured.
func (sw *SourceWatcher) notify(sourcePath, event string, names []string) {
        if sw.notifier != nil {
                sw.notifier.Notify(ErrorEvent{
                        SourcePath:    sourcePath,
                        AffectedFiles: names,
                        Event:         event,
                })
        }
}

// eventLoop processes fsnotify events.
func (sw *SourceWatcher) eventLoop() {
        defer sw.wg.Done()

        for {
                select {
                case event, ok := <-sw.watcher.Events:
                        if !ok {
                                return
                        }
                        // React to writes, creates (overwrites), renames, and removals
                        if event.Op&(fsnotify.Write|fsnotify.Create|fsnotify.Rename|fsnotify.Remove) == 0 {
                                continue
                        }
                        sw.handleChange(event.Name)

                case err, ok := <-sw.watcher.Errors:
                        if !ok {
                                return
                        }
                        sw.logFn("source-watch: watcher error: %v", err)

                case <-sw.stopCh:
                        return
                }
        }
}

// pollLoop periodically checks files on network filesystems for changes.
func (sw *SourceWatcher) pollLoop() {
        defer sw.wg.Done()

        ticker := time.NewTicker(sw.pollInterval)
        defer ticker.Stop()

        for {
                select {
                case <-ticker.C:
                        sw.pollCheck()
                case <-sw.stopCh:
                        return
                }
        }
}

// pollCheck stats all poll-monitored files and triggers handleChange for
// any that have a different mtime than recorded. It snapshots the poll set
// under a read lock, performs os.Stat calls without the lock (network FS
// stats can block), then updates mtimes and processes changes.
func (sw *SourceWatcher) pollCheck() {
        // Snapshot under read lock so os.Stat doesn't block event handling.
        type polledFile struct {
                path      string
                lastMtime time.Time
        }
        sw.mu.RLock()
        snapshot := make([]polledFile, 0, len(sw.pollFiles))
        for absPath, lastMtime := range sw.pollFiles {
                snapshot = append(snapshot, polledFile{path: absPath, lastMtime: lastMtime})
        }
        sw.mu.RUnlock()

        // Stat without holding the lock.
        type mtimeUpdate struct {
                path     string
                newMtime time.Time
        }
        var (
                updates      []mtimeUpdate
                changedPaths []string
        )
        for _, pf := range snapshot {
                info, err := os.Stat(pf.path)
                if err != nil {
                        sw.logFn("source-watch: poll: cannot stat %s: %v", pf.path, err)
                        changedPaths = append(changedPaths, pf.path)
                        continue
                }
                if !info.ModTime().Equal(pf.lastMtime) {
                        updates = append(updates, mtimeUpdate{path: pf.path, newMtime: info.ModTime()})
                        changedPaths = append(changedPaths, pf.path)
                }
        }

        // Update stored mtimes under the lock.
        if len(updates) > 0 {
                sw.mu.Lock()
                for _, u := range updates {
                        if _, ok := sw.pollFiles[u.path]; ok {
                                sw.pollFiles[u.path] = u.newMtime
                        }
                }
                sw.mu.Unlock()
        }

        // Process changes — handleChange acquires the lock per-path.
        for _, absPath := range changedPaths {
                sw.handleChange(absPath)
        }
}

// handleChange processes a source file change event.
func (sw *SourceWatcher) handleChange(absPath string) {
        sw.mu.Lock()
        defer sw.mu.Unlock()
        sw.handleChangeLocked(absPath)
}

// handleChangeLocked processes a source file change. Caller must hold sw.mu.
func (sw *SourceWatcher) handleChangeLocked(absPath string) {
        affected, ok := sw.reverse[absPath]
        if !ok {
                return // Not a tracked source file
        }

        names := make([]string, len(affected))
        for i, f := range affected {
                names[i] = f.Name
        }

        switch sw.action {
        case "warn":
                sw.logFn("source-watch: WARNING: source file changed: %s (affects: %v)", absPath, names)
                sw.notify(absPath, "changed", names)

        case "disable":
                sw.logFn("source-watch: source file changed, disabling: %s (affects: %v)", absPath, names)
                for _, f := range affected {
                        f.Disable()
                }
                sw.notify(absPath, "changed", names)

        case "checksum":
                // Stat the source file to distinguish size changes from
                // timestamp-only changes (e.g., touch).
                info, err := os.Stat(absPath)
                if err != nil {
                        // File disappeared — disable immediately
                        sw.logFn("source-watch: source file missing, disabling: %s (affects: %v)", absPath, names)
                        for _, f := range affected {
                                f.Disable()
                        }
                        sw.notify(absPath, "missing", names)
                        return
                }

                expectedSize := sw.sizes[absPath]
                if info.Size() != expectedSize {
                        // Size changed — definitely corrupted, disable immediately
                        sw.logFn("source-watch: source file size changed (%d → %d), disabling: %s (affects: %v)",
                                expectedSize, info.Size(), absPath, names)
                        for _, f := range affected {
                                f.Disable()
                        }
                        sw.notify(absPath, "size_changed", names)
                        return
                }

                // Size matches — verify checksum in background. File remains
                // accessible during verification; only disabled on mismatch.
                if sw.checksumPending[absPath] {
                        return // Already queued
                }
                sw.logFn("source-watch: source file modified, verifying checksum: %s (affects: %v)", absPath, names)
                affectedCopy := make([]*MKVFile, len(affected))
                copy(affectedCopy, affected)
                select {
                case sw.checksumCh <- checksumRequest{
                        absPath:          absPath,
                        expectedChecksum: sw.checksums[absPath],
                        expectedSize:     expectedSize,
                        affected:         affectedCopy,
                        gen:              sw.updateGen,
                }:
                        sw.checksumPending[absPath] = true
                default:
                        // Queue full — disable as a safety measure
                        sw.logFn("source-watch: checksum queue full, disabling: %s (affects: %v)", absPath, names)
                        for _, f := range affected {
                                f.Disable()
                        }
                        sw.notify(absPath, "checksum_queue_full", names)
                }
        }
}

// checksumWorker processes checksum verification requests sequentially.
// Only one goroutine runs this, ensuring that bulk source changes don't
// spawn hundreds of parallel I/O-heavy hash operations.
func (sw *SourceWatcher) checksumWorker() {
        defer sw.wg.Done()

        for {
                select {
                case req := <-sw.checksumCh:
                        // Clear pending flag so new events for this path get queued.
                        // This must happen before verification so that changes during
                        // hashing trigger a fresh verification.
                        sw.mu.Lock()
                        delete(sw.checksumPending, req.absPath)
                        stale := req.gen != sw.updateGen
                        sw.mu.Unlock()

                        if stale {
                                continue // Config was reloaded; skip stale request
                        }
                        sw.verifyChecksum(req.absPath, req.expectedChecksum, req.expectedSize, req.affected, req.gen)
                case <-sw.stopCh:
                        return
                }
        }
}

// verifyChecksum re-hashes a source file in the background. Files remain
// accessible during verification. If the checksum mismatches, affected
// virtual files are disabled (recoverable via SIGHUP reload or a
// subsequent successful checksum). The gen parameter is checked before
// disabling or enabling so that a reload during verification prevents
// stale results from affecting files in the new configuration.
func (sw *SourceWatcher) verifyChecksum(absPath string, expectedChecksum uint64, expectedSize int64, affected []*MKVFile, gen uint64) {
        names := make([]string, len(affected))
        for i, f := range affected {
                names[i] = f.Name
        }

        // disableIfCurrent disables affected files only if the watcher
        // generation hasn't changed (i.e., no reload occurred during verification).
        disableIfCurrent := func() {
                sw.mu.RLock()
                stale := gen != sw.updateGen
                sw.mu.RUnlock()
                if stale {
                        sw.logFn("source-watch: checksum: skipping disable for %s (config reloaded during verification)", absPath)
                        return
                }
                for _, f := range affected {
                        f.Disable()
                }
        }

        // Re-check size — it may have changed since the event was queued
        info, err := os.Stat(absPath)
        if err != nil {
                sw.logFn("source-watch: checksum: cannot stat %s: %v — disabling %v", absPath, err, names)
                disableIfCurrent()
                sw.notify(absPath, "missing", names)
                return
        }
        if info.Size() != expectedSize {
                sw.logFn("source-watch: checksum: size changed for %s (%d → %d) — disabling %v",
                        absPath, expectedSize, info.Size(), names)
                disableIfCurrent()
                sw.notify(absPath, "size_changed", names)
                return
        }

        // Full xxhash checksum
        f, err := os.Open(absPath)
        if err != nil {
                sw.logFn("source-watch: checksum: cannot open %s: %v — disabling %v", absPath, err, names)
                disableIfCurrent()
                sw.notify(absPath, "missing", names)
                return
        }
        defer f.Close()

        h := xxhash.New()
        buf := make([]byte, 1<<20) // 1MB buffer
        for {
                // Check for shutdown between reads so large-file hashing
                // doesn't block Stop() indefinitely.
                select {
                case <-sw.stopCh:
                        return
                default:
                }

                n, readErr := f.Read(buf)
                if n > 0 {
                        h.Write(buf[:n])
                }
                if readErr != nil {
                        if readErr != io.EOF {
                                sw.logFn("source-watch: checksum: read error for %s: %v — disabling %v", absPath, readErr, names)
                                disableIfCurrent()
                                sw.notify(absPath, "read_error", names)
                                return
                        }
                        break
                }
        }
        actualChecksum := h.Sum64()

        if actualChecksum != expectedChecksum {
                sw.logFn("source-watch: checksum mismatch for %s (got %016x, expected %016x) — disabling %v",
                        absPath, actualChecksum, expectedChecksum, names)
                disableIfCurrent()
                sw.notify(absPath, "checksum_mismatch", names)
        } else {
                // Re-enable affected files so transient issues (e.g., network
                // glitches) auto-recover without requiring admin SIGHUP.
                //
                // NOTE: a virtual file can depend on multiple source files. A
                // passing checksum for one source could re-enable a file whose
                // other source is still bad. This is a known limitation; the
                // common case (single source per MKV) is handled correctly, and
                // SIGHUP is available as a fallback for multi-source edge cases.
                sw.mu.RLock()
                stale := gen != sw.updateGen
                sw.mu.RUnlock()
                if stale {
                        sw.logFn("source-watch: checksum: skipping re-enable for %s (config reloaded during verification)", absPath)
                        return
                }
                sw.logFn("source-watch: checksum verified OK for %s — re-enabling %v", absPath, names)
                for _, f := range affected {
                        f.Enable()
                }
        }
}

//go:build linux

package fuse

import "golang.org/x/sys/unix"

// Filesystem type constants for network FS detection.
const (
        nfsSuperMagic   = 0x6969
        cifsMagicNum    = 0xFF534D42
        smb2MagicNum    = 0xFE534D42
        afsSuper        = 0x5346414F
        ncpfsSuperMagic = 0x564C
)

// IsNetworkFS checks if the given path is on a network filesystem.
// Exported for integration testing; internal callers use isNetworkFS.
func IsNetworkFS(path string) bool {
        return isNetworkFS(path)
}

// isNetworkFS checks if the given path is on a network filesystem.
func isNetworkFS(path string) bool {
        var stat unix.Statfs_t
        if err := unix.Statfs(path, &stat); err != nil {
                // Can't determine — assume local
                return false
        }

        switch stat.Type {
        case nfsSuperMagic, cifsMagicNum, smb2MagicNum, afsSuper, ncpfsSuperMagic:
                return true
        }
        return false
}

// Package matcher provides the core deduplication logic for matching MKV packets to source files.
package matcher

import (
        "fmt"
        "io"
        "os"
        "runtime"
        "strings"
        "sync"
        "sync/atomic"

        "github.com/cespare/xxhash/v2"
        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/mmap"
        "github.com/stuckj/mkvdup/internal/source"
)

const (
        // MaxExpansionBytes is the maximum number of bytes to expand a match in each direction.
        // Set high to allow matching entire video keyframes which can be several MB.
        MaxExpansionBytes = 16 * 1024 * 1024 // 16MB

        // localityNearbyCount is the max number of nearby locations to try in Phase 1
        // of locality-aware matching before falling back to a full search.
        localityNearbyCount = 8

        // localityGoodMatchThreshold is the minimum match length (bytes) to accept
        // from a nearby location without trying all remaining locations.
        // At 4KB (64x the 64-byte window), a false positive is vanishingly unlikely.
        localityGoodMatchThreshold = 4096

        // phase2MaxVerifyAttempts caps the number of tryVerifyAndExpand calls in
        // Phase 2. Common audio patterns (e.g. DTS core headers) can produce the
        // same 64-byte hash across hundreds of source files. When none verify,
        // the uncapped scan causes I/O thrashing. 64 attempts is >10x the
        // observed average for successful Phase 2 searches (~6 locations).
        phase2MaxVerifyAttempts = 64
)

// detectNALLengthSize determines the NAL unit length field size from an MKV track's
// codec ID and codec private data. Returns 0 for Annex B (start code) formats,
// or the length field size (1, 2, or 4) for AVCC/HVCC formats.
func detectNALLengthSize(codecID string, codecPrivate []byte) int {
        switch codecID {
        case "V_MPEG4/ISO/AVC":
                // AVCC format: CodecPrivate is AVCDecoderConfigurationRecord
                // Byte 4 bits 0-1 = NAL length size - 1
                if len(codecPrivate) >= 7 && codecPrivate[0] == 1 {
                        return int(codecPrivate[4]&0x03) + 1
                }
                return 4 // Default for AVC if CodecPrivate is missing or malformed
        case "V_MPEGH/ISO/HEVC":
                // HVCC format: CodecPrivate is HEVCDecoderConfigurationRecord
                // Byte 0 = configurationVersion (must be 1)
                // Byte 21 bits 6-7 = reserved (must be 111111)
                // Byte 21 bits 0-1 = NAL length size - 1
                if len(codecPrivate) >= 23 && codecPrivate[0] == 1 {
                        b := codecPrivate[21]
                        // Upper 6 bits must be all 1s per ISO/IEC 23008-2
                        if b&0xFC == 0xFC {
                                size := int(b&0x03) + 1
                                // Valid NAL length sizes are 1, 2, or 4 bytes
                                if size == 1 || size == 2 || size == 4 {
                                        return size
                                }
                        }
                }
                return 4 // Default for HEVC if CodecPrivate is missing or malformed
        default:
                return 0 // Annex B format (MPEG-2, etc.)
        }
}

// NALLengthSizeForTrack returns the NAL length size for a track, suitable for
// use by external callers like ExtractProbeHashes. Returns 0 for Annex B.
func NALLengthSizeForTrack(codecID string, codecPrivate []byte) int {
        return detectNALLengthSize(codecID, codecPrivate)
}

// matchedRegion tracks a region that was matched to a source.
type matchedRegion struct {
        mkvStart         int64
        mkvEnd           int64
        fileIndex        uint16
        srcOffset        int64 // File offset or ES offset depending on source type
        isVideo          bool  // For ES-based sources
        audioSubStreamID byte  // For audio in MPEG-PS
        isLPCM           bool  // True if this is an LPCM audio region requiring inverse transform
}

// Matcher performs the deduplication matching.
// coverageChunkSize is the granularity for coverage tracking.
// Smaller values give more accurate coverage checks but use more memory.
const coverageChunkSize = 4096 // 4KB chunks

// trackCodecInfo stores per-track codec information for format-aware matching.
type trackCodecInfo struct {
        trackType     int
        nalLengthSize int // 0 = Annex B (start codes), 1/2/4 = AVCC/HVCC (length-prefixed NAL units)
}

// trackCrossPacketHint stores per-track locality state for cross-packet
// handoff. Protected by a mutex to avoid torn reads when multiple
// goroutines process different packets on the same track concurrently.
// Read once at packet start, written once after the last match in a packet.
type trackCrossPacketHint struct {
        mu      sync.Mutex
        valid   bool
        fileIdx uint16
        offset  int64 // Midpoint of last matched source region (for Phase 1 hash locality)
        srcEnd  int64 // End of last matched source region (for locality recovery)
        mkvEnd  int64 // End of last matched MKV region (for locality recovery)
}

// packetLocality tracks per-packet locality state for deterministic
// intra-packet matching. Updated sequentially by a single goroutine,
// eliminating torn reads from shared state.
type packetLocality struct {
        valid   bool
        fileIdx uint16
        offset  int64 // Midpoint of last match (for Phase 1)
        srcEnd  int64 // End of last matched source region
        mkvEnd  int64 // End of last matched MKV region
}

type Matcher struct {
        sourceIndex    *source.Index
        mkvMmap        *mmap.File
        mkvData        []byte // Zero-copy mmap'd MKV data
        mkvSize        int64
        windowSize     int
        matchedRegions []matchedRegion
        regionsMu      sync.Mutex             // Protects matchedRegions for concurrent access
        trackTypes     map[int]int            // Map from track number to track type
        trackCodecs    map[int]trackCodecInfo // Map from track number to codec info
        numWorkers     int                    // Number of worker goroutines for parallel matching
        verboseWriter  io.Writer              // Destination for diagnostic output (nil = disabled)
        isAVCTrack     map[int]bool           // Per-track: whether this track uses H.264 NAL types
        isPCMTrack     map[int]bool           // Per-track: whether this track uses PCM audio (A_PCM/*)
        isTrueHDTrack  map[int]bool           // Per-track: whether this track uses TrueHD audio (A_TRUEHD)
        // Coverage bitmap for O(1) coverage checks. Each bit represents a chunk.
        // A chunk is marked covered when a matched region fully contains it.
        coveredChunks []uint64 // Bitmap: bit i = chunk i is covered
        coverageMu    sync.RWMutex

        // Per-track locality hints. Each track gets its own hint so interleaved
        // packets from different tracks (e.g. multiple DTS streams) don't thrash
        // a single shared hint. Created in Match() before workers start; the map
        // itself is read-only during matching, each hint is mutex-synchronized.
        trackHints map[uint64]*trackCrossPacketHint

        // Diagnostic counters for investigating match failures
        diagVideoPacketsTotal       atomic.Int64 // Total video packets processed
        diagVideoPacketsCoverage    atomic.Int64 // Video packets skipped (coverage check)
        diagVideoNALsTotal          atomic.Int64 // Total video NAL sync points tried
        diagVideoNALsTooSmall       atomic.Int64 // NALs where window didn't fit
        diagVideoNALsHashNotFound   atomic.Int64 // NALs where hash wasn't in index
        diagVideoNALsVerifyFailed   atomic.Int64 // NALs where hash found but all verifications failed
        diagVideoNALsAllSkipped     atomic.Int64 // NALs where hash found but all locations skipped (e.g. isVideo mismatch)
        diagVideoNALsMatched        atomic.Int64 // NALs successfully matched
        diagVideoNALsMatchedBytes   atomic.Int64 // Total bytes from matched video NALs
        diagVideoNALsSkippedIsVideo atomic.Int64 // Locations skipped due to isVideo mismatch
        // Per-NAL-type diagnostics (H.264 NAL type = first byte & 0x1F)
        diagNALTypeNotFound [32]atomic.Int64 // hash not found, by NAL type
        diagNALTypeMatched  [32]atomic.Int64 // matched, by NAL type
        diagNALTypeTotal    [32]atomic.Int64 // total attempted, by NAL type

        // NAL size bucket diagnostics (video only)
        // Buckets: 0=<64, 1=64-127, 2=128-1023, 3=1K-32K, 4=32K+
        diagNALSizeMatched   [5]atomic.Int64
        diagNALSizeUnmatched [5]atomic.Int64

        // Phase 2 diagnostics (all track types)
        diagPhase2Fallbacks  atomic.Int64 // Times Phase 2 full search was triggered
        diagPhase2Locations  atomic.Int64 // Total locations checked in Phase 2
        diagPhase2EarlyExits atomic.Int64 // Times Phase 2 exited early (full-frame match found)
        diagPhase2Capped     atomic.Int64 // Times Phase 2 hit the verify attempt cap
        diagPhase1Skips      atomic.Int64 // Times Phase 2 was skipped (Phase 1 sufficient)
        diagTotalSyncPoints  atomic.Int64 // Total match attempts (all track types)

        // Locality recovery diagnostics
        diagLocalityAttempts     atomic.Int64 // Times locality recovery was attempted
        diagLocalityMatched      atomic.Int64 // Times locality recovery succeeded
        diagLocalityMatchedBytes atomic.Int64 // Total bytes recovered via locality

        // First few hash-not-found examples for debugging
        diagExamplesMu     sync.Mutex
        diagExamplesCount  int
        diagExamplesOutput []string
}

// nalSizeBucket returns the bucket index for a NAL size.
// Buckets: 0=<64, 1=64-127, 2=128-1023, 3=1K-32K, 4=32K+
func nalSizeBucket(size int) int {
        switch {
        case size < 64:
                return 0
        case size < 128:
                return 1
        case size < 1024:
                return 2
        case size < 32768:
                return 3
        default:
                return 4
        }
}

// NewMatcher creates a new Matcher with the given source index.
func NewMatcher(sourceIndex *source.Index) (*Matcher, error) {
        numWorkers := runtime.NumCPU() / 2
        if numWorkers < 1 {
                numWorkers = 1
        }
        return &Matcher{
                sourceIndex:   sourceIndex,
                windowSize:    sourceIndex.WindowSize,
                trackTypes:    make(map[int]int),
                trackCodecs:   make(map[int]trackCodecInfo),
                isAVCTrack:    make(map[int]bool),
                isPCMTrack:    make(map[int]bool),
                isTrueHDTrack: make(map[int]bool),
                numWorkers:    numWorkers,
        }, nil
}

// SetVerboseWriter sets the destination for diagnostic output during matching.
// Pass nil to disable verbose output.
func (m *Matcher) SetVerboseWriter(w io.Writer) {
        m.verboseWriter = w
}

// SetNumWorkers sets the number of worker goroutines for parallel matching.
func (m *Matcher) SetNumWorkers(n int) {
        if n < 1 {
                n = 1
        }
        m.numWorkers = n
}

// Close releases resources.
func (m *Matcher) Close() error {
        if m.mkvMmap != nil {
                m.mkvMmap.Close()
        }
        return nil
}

// ProgressFunc is called to report matching progress.
type ProgressFunc func(processedPackets, totalPackets int)

// Match processes an MKV file and matches packets to the source.
func (m *Matcher) Match(mkvPath string, packets []mkv.Packet, tracks []mkv.Track, progress ProgressFunc) (*Result, error) {
        // Memory-map the MKV file for zero-copy access
        info, err := os.Stat(mkvPath)
        if err != nil {
                return nil, fmt.Errorf("stat MKV: %w", err)
        }
        m.mkvSize = info.Size()

        m.mkvMmap, err = mmap.Open(mkvPath)
        if err != nil {
                return nil, fmt.Errorf("mmap MKV: %w", err)
        }
        m.mkvData = m.mkvMmap.Data() // Store reference for zero-copy access

        // Reset per-run state in case Match() is called multiple times
        m.trackTypes = make(map[int]int)
        m.trackCodecs = make(map[int]trackCodecInfo)
        m.isAVCTrack = make(map[int]bool)
        m.isPCMTrack = make(map[int]bool)
        m.isTrueHDTrack = make(map[int]bool)
        m.diagVideoPacketsTotal.Store(0)
        m.diagVideoPacketsCoverage.Store(0)
        m.diagVideoNALsTotal.Store(0)
        m.diagVideoNALsTooSmall.Store(0)
        m.diagVideoNALsHashNotFound.Store(0)
        m.diagVideoNALsVerifyFailed.Store(0)
        m.diagVideoNALsAllSkipped.Store(0)
        m.diagVideoNALsMatched.Store(0)
        m.diagVideoNALsMatchedBytes.Store(0)
        m.diagVideoNALsSkippedIsVideo.Store(0)
        for i := range m.diagNALTypeNotFound {
                m.diagNALTypeNotFound[i].Store(0)
                m.diagNALTypeMatched[i].Store(0)
                m.diagNALTypeTotal[i].Store(0)
        }
        for i := range m.diagNALSizeMatched {
                m.diagNALSizeMatched[i].Store(0)
                m.diagNALSizeUnmatched[i].Store(0)
        }
        m.diagPhase2Fallbacks.Store(0)
        m.diagPhase2Locations.Store(0)
        m.diagPhase2EarlyExits.Store(0)
        m.diagPhase2Capped.Store(0)
        m.diagPhase1Skips.Store(0)
        m.diagTotalSyncPoints.Store(0)
        m.diagExamplesMu.Lock()
        m.diagExamplesCount = 0
        m.diagExamplesOutput = nil
        m.diagExamplesMu.Unlock()

        // Initialize per-track locality hints so each track has its own hint.
        // Zero value of trackCrossPacketHint has valid == false, which is correct.
        m.trackHints = make(map[uint64]*trackCrossPacketHint, len(tracks))
        for _, t := range tracks {
                m.trackHints[t.Number] = &trackCrossPacketHint{}
        }

        // Build track type and codec info maps
        for _, t := range tracks {
                m.trackTypes[int(t.Number)] = t.Type
                nlSize := detectNALLengthSize(t.CodecID, t.CodecPrivate)
                m.trackCodecs[int(t.Number)] = trackCodecInfo{
                        trackType:     t.Type,
                        nalLengthSize: nlSize,
                }
                if t.Type == mkv.TrackTypeVideo && strings.HasPrefix(t.CodecID, "V_MPEG4/ISO/AVC") {
                        m.isAVCTrack[int(t.Number)] = true
                }
                if t.Type == mkv.TrackTypeAudio && strings.HasPrefix(t.CodecID, "A_PCM/") {
                        m.isPCMTrack[int(t.Number)] = true
                }
                if t.Type == mkv.TrackTypeAudio && t.CodecID == "A_TRUEHD" {
                        m.isTrueHDTrack[int(t.Number)] = true
                }
        }

        // Reset matched regions with pre-allocated capacity
        // Most packets will match, so estimate capacity as number of packets
        m.matchedRegions = make([]matchedRegion, 0, len(packets))

        // Initialize coverage bitmap
        // Each uint64 holds 64 chunk bits, so we need (numChunks + 63) / 64 uint64s
        numChunks := (m.mkvSize + coverageChunkSize - 1) / coverageChunkSize
        m.coveredChunks = make([]uint64, (numChunks+63)/64)

        // Pre-sort source locations by offset to enable binary search for
        // locality-aware matching. One-time cost before concurrent access.
        m.sourceIndex.SortLocationsByOffset()

        // Set appropriate madvise hints for matching access patterns.
        m.sourceIndex.AdviseForMatching()

        result := &Result{
                TotalPackets: len(packets),
        }

        // Use parallel processing with worker pool
        result.MatchedPackets = m.matchParallel(packets, progress)

        if progress != nil {
                progress(len(packets), len(packets))
        }

        // Print diagnostic summary (verbose only)
        if m.verboseWriter != nil {
                w := m.verboseWriter
                fmt.Fprintf(w, "\n=== Video Matching Diagnostics ===\n")
                fmt.Fprintf(w, "Video packets total:        %d\n", m.diagVideoPacketsTotal.Load())
                fmt.Fprintf(w, "Video packets skip-covered: %d\n", m.diagVideoPacketsCoverage.Load())
                fmt.Fprintf(w, "Video NALs total:           %d\n", m.diagVideoNALsTotal.Load())
                fmt.Fprintf(w, "Video NALs too small:       %d\n", m.diagVideoNALsTooSmall.Load())
                fmt.Fprintf(w, "Video NALs hash not found:  %d\n", m.diagVideoNALsHashNotFound.Load())
                fmt.Fprintf(w, "Video NALs verify failed:   %d\n", m.diagVideoNALsVerifyFailed.Load())
                fmt.Fprintf(w, "Video NALs all skipped:     %d\n", m.diagVideoNALsAllSkipped.Load())
                fmt.Fprintf(w, "Video NALs matched:         %d\n", m.diagVideoNALsMatched.Load())
                fmt.Fprintf(w, "Video NALs matched bytes:   %d (%.2f MB)\n",
                        m.diagVideoNALsMatchedBytes.Load(), float64(m.diagVideoNALsMatchedBytes.Load())/(1024*1024))
                fmt.Fprintf(w, "Video NALs isVideo skips:   %d\n", m.diagVideoNALsSkippedIsVideo.Load())
                if len(m.isAVCTrack) > 0 {
                        fmt.Fprintf(w, "\nPer-NAL-type breakdown (H.264, type: total / matched / not_found / miss%%):\n")
                        nalTypeNames := map[byte]string{
                                1: "non-IDR slice", 2: "slice A", 3: "slice B", 4: "slice C",
                                5: "IDR slice", 6: "SEI", 7: "SPS", 8: "PPS", 9: "AUD", 12: "filler",
                        }
                        for i := 0; i < 32; i++ {
                                total := m.diagNALTypeTotal[i].Load()
                                if total == 0 {
                                        continue
                                }
                                matched := m.diagNALTypeMatched[i].Load()
                                notFound := m.diagNALTypeNotFound[i].Load()
                                name := nalTypeNames[byte(i)]
                                if name == "" {
                                        name = "other"
                                }
                                fmt.Fprintf(w, "  type %2d (%14s): %8d / %8d / %8d (%.1f%% miss)\n",
                                        i, name, total, matched, notFound, float64(notFound)/float64(total)*100)
                        }
                }
                // NAL size bucket breakdown
                nalSizeBucketNames := [5]string{"<64B", "64-127B", "128B-1KB", "1KB-32KB", "32KB+"}
                fmt.Fprintf(w, "\nVideo NAL size distribution (matched / unmatched):\n")
                for i := 0; i < 5; i++ {
                        matched := m.diagNALSizeMatched[i].Load()
                        unmatched := m.diagNALSizeUnmatched[i].Load()
                        if matched > 0 || unmatched > 0 {
                                fmt.Fprintf(w, "  %9s: %8d matched, %8d unmatched\n",
                                        nalSizeBucketNames[i], matched, unmatched)
                        }
                }

                fmt.Fprintf(w, "\nTotal match attempts: %d\n", m.diagTotalSyncPoints.Load())
                fmt.Fprintf(w, "Phase 1 skips (Phase 2 avoided): %d\n", m.diagPhase1Skips.Load())
                fmt.Fprintf(w, "Phase 2 full-search fallbacks: %d\n", m.diagPhase2Fallbacks.Load())
                fmt.Fprintf(w, "Phase 2 total locations checked: %d\n", m.diagPhase2Locations.Load())
                fmt.Fprintf(w, "Phase 2 early exits: %d\n", m.diagPhase2EarlyExits.Load())
                fmt.Fprintf(w, "Phase 2 capped (hit %d limit): %d\n", phase2MaxVerifyAttempts, m.diagPhase2Capped.Load())

                fmt.Fprintf(w, "\nLocality recovery:\n")
                fmt.Fprintf(w, "  Attempts:  %d\n", m.diagLocalityAttempts.Load())
                fmt.Fprintf(w, "  Matched:   %d\n", m.diagLocalityMatched.Load())
                fmt.Fprintf(w, "  Bytes:     %d\n", m.diagLocalityMatchedBytes.Load())

                fmt.Fprintf(w, "\nFirst hash-not-found examples:\n")
                for _, ex := range m.diagExamplesOutput {
                        fmt.Fprintf(w, "%s\n", ex)
                }
                fmt.Fprintf(w, "=================================\n")
        }

        // Fill TrueHD gaps using adjacent matched regions
        m.fillTrueHDGaps(packets)

        // Merge overlapping regions and build final entries
        m.mergeRegions()
        var buildErr error
        result.Entries, result.DeltaFile, buildErr = m.buildEntries()
        if buildErr != nil {
                return nil, fmt.Errorf("build entries: %w", buildErr)
        }

        // Calculate statistics
        for _, e := range result.Entries {
                if e.Source == 0 {
                        result.UnmatchedBytes += e.Length
                } else {
                        result.MatchedBytes += e.Length
                }
        }

        return result, nil
}

// ProbeHash represents a hash computed from a sync point in packet data.
type ProbeHash struct {
        Hash    uint64
        IsVideo bool
}

// ExtractProbeHashes extracts probe hashes from packet data using sync point detection.
// This is the same algorithm used by the matcher to find matching points.
// The data should be the first few KB of a packet (typically up to 4096 bytes).
// windowSize should match the source index window size (typically 64 bytes).
// nalLengthSize is 0 for Annex B video, or 1/2/4 for AVCC/HVCC video.
// Returns nil if no valid hashes could be extracted.
func ExtractProbeHashes(data []byte, isVideo bool, windowSize int, nalLengthSize int) []ProbeHash {
        if len(data) < windowSize {
                return nil
        }

        var hashes []ProbeHash

        // Find sync points within the packet data
        var syncPoints []int
        if isVideo {
                if nalLengthSize > 0 {
                        syncPoints = source.FindAVCCNALStarts(data, nalLengthSize)
                } else {
                        syncPoints = source.FindVideoNALStarts(data)
                }
        } else {
                syncPoints = source.FindAudioSyncPoints(data)
        }

        // Hash from sync points
        for _, syncOff := range syncPoints {
                if syncOff+windowSize > len(data) {
                        continue
                }
                hash := xxhash.Sum64(data[syncOff : syncOff+windowSize])
                hashes = append(hashes, ProbeHash{
                        Hash:    hash,
                        IsVideo: isVideo,
                })
        }

        // If no sync points found, try from data start
        if len(hashes) == 0 {
                hash := xxhash.Sum64(data[:windowSize])
                hashes = append(hashes, ProbeHash{
                        Hash:    hash,
                        IsVideo: isVideo,
                })
        }

        return hashes
}

package matcher

import (
        "fmt"
        "sort"
)

// mergeRegions merges overlapping matched regions.
// Regions from the same source with consistent offset mappings are merged into one.
// Overlapping regions from different sources (or inconsistent offsets) are clipped:
// the earlier region keeps its full range, the later region is trimmed to start
// after the earlier one ends.
func (m *Matcher) mergeRegions() {
        if len(m.matchedRegions) == 0 {
                return
        }

        // Sort by start offset
        sort.Slice(m.matchedRegions, func(i, j int) bool {
                return m.matchedRegions[i].mkvStart < m.matchedRegions[j].mkvStart
        })

        // Merge overlapping regions
        // Pre-allocate with capacity since merged will be at most len(matchedRegions)
        merged := make([]matchedRegion, 1, len(m.matchedRegions))
        merged[0] = m.matchedRegions[0]
        for i := 1; i < len(m.matchedRegions); i++ {
                curr := m.matchedRegions[i]
                last := &merged[len(merged)-1]

                if curr.mkvStart >= last.mkvEnd {
                        // No overlap - add new region
                        merged = append(merged, curr)
                        continue
                }

                // Regions overlap. Check if they're from the same source with consistent
                // offset mapping, meaning the overlapping bytes map to the same source bytes.
                expectedSrcOffset := last.srcOffset + (curr.mkvStart - last.mkvStart)
                sameMapping := curr.fileIndex == last.fileIndex &&
                        curr.srcOffset == expectedSrcOffset &&
                        curr.isVideo == last.isVideo &&
                        curr.audioSubStreamID == last.audioSubStreamID

                if sameMapping {
                        // Same source, consistent mapping - safe to extend since both regions
                        // were independently verified and the combined range maps correctly.
                        if curr.mkvEnd > last.mkvEnd {
                                last.mkvEnd = curr.mkvEnd
                        }
                } else if curr.mkvEnd > last.mkvEnd {
                        // Different source or inconsistent mapping. The earlier region (last)
                        // keeps priority. Clip curr to start where last ends.
                        overlap := last.mkvEnd - curr.mkvStart
                        curr.mkvStart = last.mkvEnd
                        curr.srcOffset += overlap
                        // After clipping, curr may have zero or negative length if the overlap
                        // equals or exceeds the original region size. Only keep valid regions.
                        if curr.mkvStart < curr.mkvEnd {
                                merged = append(merged, curr)
                        }
                }
                // If curr is fully contained in last, drop it (nothing to add).
        }

        m.matchedRegions = merged
}

// buildEntries creates the final entry list and streams delta data to a temp file.
func (m *Matcher) buildEntries() ([]Entry, *DeltaWriter, error) {
        entries := make([]Entry, 0, len(m.matchedRegions)*2+1)

        deltaWriter, err := NewDeltaWriter()
        if err != nil {
                return nil, nil, err
        }

        deltaOffset := int64(0)
        pos := int64(0)
        regionIdx := 0

        for pos < m.mkvSize {
                var inRegion *matchedRegion
                if regionIdx < len(m.matchedRegions) && m.matchedRegions[regionIdx].mkvStart <= pos {
                        inRegion = &m.matchedRegions[regionIdx]
                }

                if inRegion != nil && pos >= inRegion.mkvStart && pos < inRegion.mkvEnd {
                        offsetInRegion := pos - inRegion.mkvStart
                        regionLen := inRegion.mkvEnd - pos

                        entries = append(entries, Entry{
                                MkvOffset:        pos,
                                Length:           regionLen,
                                Source:           uint16(inRegion.fileIndex + 1),
                                SourceOffset:     inRegion.srcOffset + offsetInRegion,
                                IsVideo:          inRegion.isVideo,
                                AudioSubStreamID: inRegion.audioSubStreamID,
                                IsLPCM:           inRegion.isLPCM,
                        })

                        pos = inRegion.mkvEnd
                        regionIdx++
                } else {
                        gapEnd := m.mkvSize
                        if regionIdx < len(m.matchedRegions) {
                                gapEnd = m.matchedRegions[regionIdx].mkvStart
                        }
                        gapLen := gapEnd - pos

                        if gapEnd <= m.mkvSize {
                                entries = append(entries, Entry{
                                        MkvOffset:    pos,
                                        Length:       gapLen,
                                        Source:       0,
                                        SourceOffset: deltaOffset,
                                })
                                // Write gap data directly from mmap to temp file
                                if err := deltaWriter.Write(m.mkvData[pos:gapEnd]); err != nil {
                                        deltaWriter.Close()
                                        return nil, nil, fmt.Errorf("write delta: %w", err)
                                }
                                deltaOffset += gapLen
                        }

                        pos = gapEnd
                }
        }

        if err := deltaWriter.Flush(); err != nil {
                deltaWriter.Close()
                return nil, nil, fmt.Errorf("flush delta: %w", err)
        }

        return entries, deltaWriter, nil
}

package matcher

import (
        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/source"
)

// expandChunkSize is the number of bytes to read at once during match expansion.
// Larger chunks reduce page faults when expanding across mmap'd source files.
const expandChunkSize = 4096

// tryVerifyAndExpand attempts to verify and expand a match, returning the matched region or nil.
func (m *Matcher) tryVerifyAndExpand(pkt mkv.Packet, loc source.Location, offsetInPacket int64, isVideo bool) *matchedRegion {
        // The MKV offset where this sync point is
        mkvSyncOffset := pkt.Offset + offsetInPacket

        // Verify the initial match (at least windowSize bytes)
        verifyLen := int64(m.windowSize)
        remainingInPacket := pkt.Size - offsetInPacket
        if verifyLen > remainingInPacket {
                verifyLen = remainingInPacket
        }

        // Zero-copy: slice directly into mmap'd data
        endOffset := mkvSyncOffset + verifyLen
        if endOffset > m.mkvSize {
                return nil
        }
        mkvBuf := m.mkvData[mkvSyncOffset:endOffset]

        // Read source data - use ES reader for ES-based indexes, raw slice for zero-copy
        var srcBuf []byte
        var err error
        if m.sourceIndex.UsesESOffsets {
                srcBuf, err = m.sourceIndex.ReadESDataAt(loc, int(verifyLen))
                if err != nil || len(srcBuf) < int(verifyLen) {
                        return nil
                }
        } else {
                // For raw indexes, use zero-copy slice
                srcBuf = m.sourceIndex.RawSlice(loc, int(verifyLen))
                if srcBuf == nil || len(srcBuf) < int(verifyLen) {
                        return nil
                }
        }

        // Check if bytes match
        for i := range mkvBuf {
                if mkvBuf[i] != srcBuf[i] {
                        return nil
                }
        }

        isLPCM := source.IsLPCMSubStreamID(loc.AudioSubStreamID)

        // Reject LPCM source matches when the MKV track is not PCM audio.
        // Without this check, coincidental byte-level matches between non-PCM
        // MKV data (e.g., AC3) and LPCM source data produce entries flagged
        // as LPCM. During reconstruction, the byte-swap transform is applied
        // to these entries, corrupting the output and causing verification failure.
        // Check before expansion to avoid unnecessary work.
        if isLPCM && !m.isPCMTrack[int(pkt.TrackNum)] {
                return nil
        }

        // Expand the match from the sync point
        mkvStart, srcStart, matchLen := m.expandMatch(
                mkvSyncOffset, loc, verifyLen,
        )

        // For LPCM entries, align boundaries to 2-byte sample pairs.
        // The byte-swap transform operates on pairs; an unpaired byte at either
        // end cannot be correctly swapped during FUSE reconstruction.
        if isLPCM && matchLen > 1 {
                if srcStart%2 == 1 {
                        mkvStart++
                        srcStart++
                        matchLen--
                }
                if matchLen%2 == 1 {
                        matchLen--
                }
        }
        if matchLen <= 0 {
                return nil
        }

        region := &matchedRegion{
                mkvStart:         mkvStart,
                mkvEnd:           mkvStart + matchLen,
                fileIndex:        loc.FileIndex,
                srcOffset:        srcStart,
                isVideo:          isVideo,
                audioSubStreamID: loc.AudioSubStreamID,
                isLPCM:           isLPCM,
        }

        return region
}

// expandMatch expands a verified match in both directions.
func (m *Matcher) expandMatch(mkvOffset int64, loc source.Location, initialLen int64) (mkvStart, srcStart, length int64) {
        mkvStart = mkvOffset
        srcStart = loc.Offset
        length = initialLen

        // Get source size for bounds checking
        var srcSize int64
        if m.sourceIndex.UsesESOffsets && int(loc.FileIndex) < len(m.sourceIndex.ESReaders) {
                if loc.IsVideo {
                        srcSize = m.sourceIndex.ESReaders[loc.FileIndex].TotalESSize(true)
                } else {
                        srcSize = m.sourceIndex.ESReaders[loc.FileIndex].AudioSubStreamESSize(loc.AudioSubStreamID)
                }
        } else {
                if int(loc.FileIndex) < len(m.sourceIndex.Files) {
                        srcSize = m.sourceIndex.Files[loc.FileIndex].Size
                }
        }

        if m.sourceIndex.UsesESOffsets {
                m.expandMatchES(mkvOffset, loc, srcSize, &mkvStart, &srcStart, &length)
        } else {
                m.expandMatchRaw(mkvOffset, loc, srcSize, &mkvStart, &srcStart, &length)
        }

        return mkvStart, srcStart, length
}

// expandMatchES expands a match using byte-by-byte ES reads with range hints.
// This is optimized for DVD MPEG-PS sources where ES data is non-contiguous.
func (m *Matcher) expandMatchES(mkvOffset int64, loc source.Location, srcSize int64, mkvStart, srcStart, length *int64) {
        // Expand backward
        backwardHint := -1
        backwardExpanded := int64(0)
        for *mkvStart > 0 && *srcStart > 0 && backwardExpanded < MaxExpansionBytes {
                mkvByte := m.mkvData[*mkvStart-1]
                readLoc := source.Location{
                        FileIndex:        loc.FileIndex,
                        Offset:           *srcStart - 1,
                        IsVideo:          loc.IsVideo,
                        AudioSubStreamID: loc.AudioSubStreamID,
                }
                srcByteVal, hint, ok := m.sourceIndex.ReadESByteWithHint(readLoc, backwardHint)
                backwardHint = hint
                if !ok || mkvByte != srcByteVal {
                        break
                }
                *mkvStart--
                *srcStart--
                *length++
                backwardExpanded++
        }

        // Expand forward
        forwardHint := -1
        mkvEnd := *mkvStart + *length
        srcEnd := *srcStart + *length
        forwardExpanded := int64(0)
        for mkvEnd < m.mkvSize && srcEnd < srcSize && forwardExpanded < MaxExpansionBytes {
                mkvByte := m.mkvData[mkvEnd]
                readLoc := source.Location{
                        FileIndex:        loc.FileIndex,
                        Offset:           srcEnd,
                        IsVideo:          loc.IsVideo,
                        AudioSubStreamID: loc.AudioSubStreamID,
                }
                srcByteVal, hint, ok := m.sourceIndex.ReadESByteWithHint(readLoc, forwardHint)
                forwardHint = hint
                if !ok || mkvByte != srcByteVal {
                        break
                }
                mkvEnd++
                srcEnd++
                *length++
                forwardExpanded++
        }
}

// expandMatchRaw expands a match using chunked reads from raw mmap'd source files.
// Reads 4KB chunks at a time to reduce page faults compared to byte-by-byte access.
func (m *Matcher) expandMatchRaw(mkvOffset int64, loc source.Location, srcSize int64, mkvStart, srcStart, length *int64) {
        // Expand backward in chunks
        backwardExpanded := int64(0)
        for *mkvStart > 0 && *srcStart > 0 && backwardExpanded < MaxExpansionBytes {
                // Determine chunk size
                chunkLen := int64(expandChunkSize)
                if chunkLen > *srcStart {
                        chunkLen = *srcStart
                }
                if chunkLen > *mkvStart {
                        chunkLen = *mkvStart
                }
                if chunkLen > MaxExpansionBytes-backwardExpanded {
                        chunkLen = MaxExpansionBytes - backwardExpanded
                }
                if chunkLen <= 0 {
                        break
                }

                srcChunk := m.sourceIndex.RawSlice(source.Location{
                        FileIndex: loc.FileIndex,
                        Offset:    *srcStart - chunkLen,
                }, int(chunkLen))
                if len(srcChunk) == 0 {
                        break
                }

                // Compare backwards through the chunk
                mkvChunkStart := *mkvStart - int64(len(srcChunk))
                matched := int64(0)
                for i := len(srcChunk) - 1; i >= 0; i-- {
                        if srcChunk[i] != m.mkvData[mkvChunkStart+int64(i)] {
                                break
                        }
                        matched++
                }

                if matched == 0 {
                        break
                }

                *mkvStart -= matched
                *srcStart -= matched
                *length += matched
                backwardExpanded += matched

                if matched < int64(len(srcChunk)) {
                        break
                }
        }

        // Expand forward in chunks
        mkvEnd := *mkvStart + *length
        srcEnd := *srcStart + *length
        forwardExpanded := int64(0)
        for mkvEnd < m.mkvSize && srcEnd < srcSize && forwardExpanded < MaxExpansionBytes {
                chunkLen := int64(expandChunkSize)
                if chunkLen > srcSize-srcEnd {
                        chunkLen = srcSize - srcEnd
                }
                if chunkLen > m.mkvSize-mkvEnd {
                        chunkLen = m.mkvSize - mkvEnd
                }
                if chunkLen > MaxExpansionBytes-forwardExpanded {
                        chunkLen = MaxExpansionBytes - forwardExpanded
                }
                if chunkLen <= 0 {
                        break
                }

                srcChunk := m.sourceIndex.RawSlice(source.Location{
                        FileIndex: loc.FileIndex,
                        Offset:    srcEnd,
                }, int(chunkLen))
                if len(srcChunk) == 0 {
                        break
                }

                // Compare forward through the chunk
                matched := int64(0)
                for i := 0; i < len(srcChunk); i++ {
                        if srcChunk[i] != m.mkvData[mkvEnd+int64(i)] {
                                break
                        }
                        matched++
                }

                if matched == 0 {
                        break
                }

                mkvEnd += matched
                srcEnd += matched
                *length += matched
                forwardExpanded += matched

                if matched < int64(len(srcChunk)) {
                        break
                }
        }
}

package matcher

import (
        "fmt"

        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/source"
)

// localityVerifyLen is the minimum number of bytes needed for a reliable
// locality-based match. NALs smaller than this are skipped.
const localityVerifyLen = 64

// alignSearchRange is the number of byte offsets to try in each direction
// when aligning the predicted source offset to the actual NAL header position.
// The prediction can be off by a few bytes due to AVCC vs Annex B framing
// differences (length prefix size vs start code size).
const alignSearchRange = 3

// minAlignBytes is the minimum number of leading bytes that must match
// to confirm alignment between MKV and source NAL data. Set to 4 to avoid
// false positives — H.264 NALs have predictable first 2 bytes (NAL type +
// first slice header byte), so 2 bytes is insufficient across 7 candidates.
const minAlignBytes = 4

// tryLocalityMatch attempts to recover a NAL that failed hash-based matching
// by using the per-track locality hint to predict the source location. This
// recovers NALs that the indexer missed during source indexing — the bytes
// exist in the source but were never hashed into the index.
//
// The approach compares leading bytes at nearby offsets to align the
// prediction, then verifies the full NAL matches byte-for-byte.
//
// Returns a normal matchedRegion, or nil if no match found.
func (m *Matcher) tryLocalityMatch(
        pkt mkv.Packet,
        syncOff int,
        mkvNALData []byte,
        loc packetLocality,
        nalSize int,
) *matchedRegion {
        // Only attempt if we have valid per-packet locality and a large enough NAL
        if !loc.valid || loc.srcEnd <= 0 || loc.mkvEnd <= 0 {
                return nil
        }
        if nalSize < localityVerifyLen || len(mkvNALData) < nalSize {
                return nil
        }

        // Predict approximate source ES offset. Within a single MKV packet,
        // NALs are packed sequentially, so the MKV offset delta closely
        // matches the source ES offset delta (differing only by framing:
        // AVCC length prefix vs Annex B start code, typically ±1 byte).
        // Across packets, MKV offsets include container overhead (cluster/block
        // headers, other tracks' data) that doesn't exist in the source ES,
        // making the prediction unreliable. Skip if the gap is too large.
        currentMkvOff := pkt.Offset + int64(syncOff)
        mkvDelta := currentMkvOff - loc.mkvEnd
        if mkvDelta < 0 || mkvDelta > int64(nalSize)*2 {
                return nil
        }
        predictedSrcOff := loc.srcEnd + mkvDelta
        if predictedSrcOff < 0 {
                return nil
        }

        // Count actual IO-probing attempts (after all early-exit guards)
        m.diagLocalityAttempts.Add(1)
        debugN := m.diagLocalityAttempts.Load()
        debug := m.verboseWriter != nil && debugN <= 10

        hintFileIndex := loc.fileIdx

        if debug {
                fmt.Fprintf(m.verboseWriter, "[locality#%d] mkvOff=%d nalSize=%d nalHdr=%02x predictedSrc=%d fileIdx=%d\n",
                        debugN, currentMkvOff, nalSize, mkvNALData[0], predictedSrcOff, hintFileIndex)
        }

        // Try to align the predicted offset to the actual NAL header position
        // by comparing leading bytes. The prediction can be off by a few bytes
        // due to AVCC (4-byte length prefix) vs Annex B (3-4 byte start code)
        // framing differences. We try offsets around the prediction and look for
        // the first position where the initial bytes match the MKV NAL data.
        srcNALOffset := int64(-1)
        for delta := -alignSearchRange; delta <= alignSearchRange; delta++ {
                candidateOff := predictedSrcOff + int64(delta)
                if candidateOff < 0 {
                        continue
                }

                loc := source.Location{
                        FileIndex: hintFileIndex,
                        Offset:    candidateOff,
                        IsVideo:   true,
                }
                probe, err := m.sourceIndex.ReadESDataAt(loc, minAlignBytes)
                if err != nil || len(probe) < minAlignBytes {
                        continue
                }

                // Check if the first minAlignBytes bytes match the MKV NAL data
                match := true
                for i := 0; i < minAlignBytes; i++ {
                        if probe[i] != mkvNALData[i] {
                                match = false
                                break
                        }
                }
                if match {
                        srcNALOffset = candidateOff
                        break
                }
        }

        if srcNALOffset < 0 {
                if debug {
                        fmt.Fprintf(m.verboseWriter, "[locality#%d] alignment failed\n", debugN)
                }
                return nil
        }

        // Read source NAL data at the aligned offset and verify full match.
        srcLoc := source.Location{
                FileIndex: hintFileIndex,
                Offset:    srcNALOffset,
                IsVideo:   true,
        }
        srcData, err := m.sourceIndex.ReadESDataAt(srcLoc, nalSize)
        if err != nil || len(srcData) < nalSize {
                if debug {
                        fmt.Fprintf(m.verboseWriter, "[locality#%d] source read failed: err=%v len=%d need=%d\n", debugN, err, len(srcData), nalSize)
                }
                return nil
        }

        // Verify every byte matches
        for i := 0; i < nalSize; i++ {
                if mkvNALData[i] != srcData[i] {
                        if debug {
                                fmt.Fprintf(m.verboseWriter, "[locality#%d] mismatch at byte %d: src=%02x mkv=%02x\n", debugN, i, srcData[i], mkvNALData[i])
                        }
                        return nil
                }
        }

        // Success — exact match found via locality prediction.
        if debug {
                fmt.Fprintf(m.verboseWriter, "[locality#%d] exact match at srcOff=%d\n", debugN, srcNALOffset)
        }

        mkvStart := pkt.Offset + int64(syncOff)
        mkvEnd := mkvStart + int64(nalSize)

        m.diagLocalityMatched.Add(1)
        m.diagLocalityMatchedBytes.Add(int64(nalSize))

        return &matchedRegion{
                mkvStart:  mkvStart,
                mkvEnd:    mkvEnd,
                fileIndex: hintFileIndex,
                srcOffset: srcNALOffset,
                isVideo:   true,
        }
}

package matcher

import (
        "fmt"
        "sync"
        "sync/atomic"

        "github.com/cespare/xxhash/v2"
        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/source"
)

// computeNALSize computes the NAL/sync-unit or frame size from the sync point layout.
// For AVCC, consecutive sync points are separated by the length prefix of the
// next NAL. For Annex B video, sync points correspond to NAL/sync-unit boundaries
// (e.g. slice headers, sequence headers), not necessarily whole decoded frames;
// for audio and subtitles, consecutive sync points typically delimit frame boundaries.
// Returns (nalSize, exact). exact is true only when derived from a known next
// sync point; when false, nalSize is just the remaining data in the (possibly
// truncated) buffer and must not be used for short-circuit decisions.
func computeNALSize(syncPoints []int, i, syncOff, dataLen int, isVideo bool, nalLengthSize int) (int, bool) {
        nalSize := dataLen - syncOff
        if i+1 < len(syncPoints) {
                if isVideo && nalLengthSize > 0 {
                        return syncPoints[i+1] - nalLengthSize - syncOff, true
                }
                return syncPoints[i+1] - syncOff, true
        }
        return nalSize, false
}

// matchParallel processes packets in parallel using a worker pool.
func (m *Matcher) matchParallel(packets []mkv.Packet, progress ProgressFunc) int {
        var processedCount atomic.Int64
        var matchedCount atomic.Int64
        totalPackets := len(packets)

        // Create work channel
        workChan := make(chan mkv.Packet, m.numWorkers*2)

        // Start workers
        var wg sync.WaitGroup
        for i := 0; i < m.numWorkers; i++ {
                wg.Add(1)
                go func() {
                        defer wg.Done()
                        for pkt := range workChan {
                                matched := m.matchPacketParallel(pkt)
                                if matched {
                                        matchedCount.Add(1)
                                }
                                count := processedCount.Add(1)
                                if progress != nil && count%1000 == 0 {
                                        progress(int(count), totalPackets)
                                }
                        }
                }()
        }

        // Send work to workers
        for _, pkt := range packets {
                workChan <- pkt
        }
        close(workChan)

        // Wait for all workers to finish
        wg.Wait()

        return int(matchedCount.Load())
}

// matchPacketParallel is the thread-safe version of matchPacket.
func (m *Matcher) matchPacketParallel(pkt mkv.Packet) bool {
        // Determine if this is video or audio
        trackType := m.trackTypes[int(pkt.TrackNum)]
        isVideo := trackType == mkv.TrackTypeVideo

        if isVideo {
                m.diagVideoPacketsTotal.Add(1)
        }

        // Check if this region is already covered by a matched region
        // Note: This is a relaxed check - we may miss some coverage due to race conditions,
        // but that's okay since we merge overlapping regions at the end anyway
        if m.isRangeCoveredParallel(pkt.Offset, pkt.Size) {
                if isVideo {
                        m.diagVideoPacketsCoverage.Add(1)
                }
                return true
        }

        // Read packet data to find sync points (zero-copy slice access)
        readSize := pkt.Size
        if readSize < int64(m.windowSize) {
                return false
        }

        // For AVCC/HVCC video, use the full packet data. AVCC parsing is O(num_NALs)
        // not O(packet_size) — it reads 4-byte length fields and jumps, touching only
        // ~20 bytes for a typical frame with 5 NALs. Without this, large frames with
        // multiple slice NALs (common in 1080p Blu-ray) only match the first slice
        // since subsequent slices start past the truncated window.
        // For audio and Annex B video (linear scan), cap at 4096 to avoid waste.
        var useFullPacket bool
        if isVideo {
                codecInfo := m.trackCodecs[int(pkt.TrackNum)]
                if codecInfo.nalLengthSize > 0 {
                        useFullPacket = true
                }
        }
        if !useFullPacket && readSize > 4096 {
                readSize = 4096
        }

        // Zero-copy: slice directly into mmap'd data
        endOffset := pkt.Offset + readSize
        if endOffset > m.mkvSize {
                endOffset = m.mkvSize
        }
        data := m.mkvData[pkt.Offset:endOffset]
        if len(data) < m.windowSize {
                return false
        }

        // Find sync points within the packet data
        var syncPoints []int
        codecInfo := m.trackCodecs[int(pkt.TrackNum)]
        if isVideo {
                if codecInfo.nalLengthSize > 0 {
                        // AVCC/HVCC format: parse length-prefixed NAL units
                        syncPoints = source.FindAVCCNALStarts(data, codecInfo.nalLengthSize)
                } else {
                        // Annex B format: find NAL starts after 00 00 01
                        syncPoints = source.FindVideoNALStarts(data)
                }
        } else if trackType == mkv.TrackTypeSubtitle {
                syncPoints = source.FindPGSSyncPoints(data)
        } else if m.isPCMTrack[int(pkt.TrackNum)] {
                syncPoints = source.FindLPCMMatchSyncPoints(data)
        } else {
                syncPoints = source.FindAudioSyncPoints(data)
        }

        // Read cross-packet hint once at packet start (mutex-protected, consistent snapshot).
        crossHint := m.trackHints[pkt.TrackNum]
        var pktLoc packetLocality
        crossHint.mu.Lock()
        if crossHint.valid {
                pktLoc.valid = true
                pktLoc.fileIdx = crossHint.fileIdx
                pktLoc.offset = crossHint.offset
                pktLoc.srcEnd = crossHint.srcEnd
                pktLoc.mkvEnd = crossHint.mkvEnd
        }
        crossHint.mu.Unlock()

        // recordMatch handles bookkeeping for a successful match (hash-based
        // or locality-based): adds the region, marks coverage, updates state.
        recordMatch := func(region *matchedRegion, nalSize int, nalType ...byte) {
                matchLen := region.mkvEnd - region.mkvStart
                m.regionsMu.Lock()
                m.matchedRegions = append(m.matchedRegions, *region)
                m.regionsMu.Unlock()
                m.markChunksCovered(region.mkvStart, region.mkvEnd)
                // Update per-packet locality (deterministic, goroutine-local)
                pktLoc.valid = true
                pktLoc.fileIdx = region.fileIndex
                pktLoc.offset = region.srcOffset + matchLen/2
                pktLoc.srcEnd = region.srcOffset + matchLen
                pktLoc.mkvEnd = region.mkvEnd
                if isVideo {
                        m.diagVideoNALsMatched.Add(1)
                        m.diagVideoNALsMatchedBytes.Add(matchLen)
                        m.diagNALSizeMatched[nalSizeBucket(nalSize)].Add(1)
                        if len(nalType) > 0 {
                                m.diagNALTypeMatched[nalType[0]].Add(1)
                        }
                }
        }

        // For AVCC/HVCC video, each NAL unit has different framing bytes than the
        // source (length prefix vs start code), so expansion stops at NAL boundaries.
        // We must match each NAL individually to cover the full packet.
        // For Annex B video (MPEG-2), expansion can cross start code boundaries
        // when the source data matches. However, shared structures like sequence
        // headers match many source locations with short expansions. We must
        // continue trying other sync points (e.g., slice headers) to find better
        // matches that cover the full packet.
        anyMatched := false
        for i, syncOff := range syncPoints {
                if syncOff+m.windowSize > len(data) {
                        if isVideo {
                                m.diagVideoNALsTooSmall.Add(1)
                                m.diagNALSizeUnmatched[0].Add(1) // <64B bucket
                        }
                        continue
                }

                // Skip sync points whose chunk is already covered — the source data
                // for this region has already been verified byte-for-byte by a prior match.
                if m.isChunkCoveredParallel(pkt.Offset + int64(syncOff)) {
                        continue
                }

                if isVideo {
                        m.diagVideoNALsTotal.Add(1)
                }

                // Compute NAL/frame size from distance to next sync point.
                nalSize, nalSizeExact := computeNALSize(syncPoints, i, syncOff, len(data), isVideo, codecInfo.nalLengthSize)

                // H.264 NAL type diagnostics (other codecs use different type encodings)
                var nalType byte
                isAVC := isVideo && m.isAVCTrack[int(pkt.TrackNum)] && syncOff < len(data)
                if isAVC {
                        nalType = data[syncOff] & 0x1F
                        m.diagNALTypeTotal[nalType].Add(1)
                }

                // Hash-based matching (all codecs)
                var region *matchedRegion
                if isAVC {
                        region = m.tryMatchFromOffsetParallel(pkt, int64(syncOff), data[syncOff:], isVideo, pktLoc, nalSize, nalSizeExact, nalType)
                } else {
                        region = m.tryMatchFromOffsetParallel(pkt, int64(syncOff), data[syncOff:], isVideo, pktLoc, nalSize, nalSizeExact)
                }

                // Locality-based recovery for unmatched video NALs (all video codecs)
                if region == nil && isVideo && m.sourceIndex.UsesESOffsets && nalSizeExact {
                        region = m.tryLocalityMatch(pkt, syncOff, data[syncOff:], pktLoc, nalSize)
                }

                if region != nil {
                        if isAVC {
                                recordMatch(region, nalSize, nalType)
                        } else {
                                recordMatch(region, nalSize)
                        }
                } else if isVideo {
                        m.diagNALSizeUnmatched[nalSizeBucket(nalSize)].Add(1)
                }

                if region != nil {
                        anyMatched = true
                        if m.isRangeCoveredParallel(pkt.Offset, pkt.Size) {
                                break
                        }
                }
        }

        // For Annex B video, if the first 4096 bytes didn't give full coverage,
        // scan the rest of the packet for additional sync points. This handles
        // cases where only shared structures (sequence headers) appear early
        // but unique slice data further in the packet would match.
        if isVideo && !useFullPacket && !m.isRangeCoveredParallel(pkt.Offset, pkt.Size) && pkt.Size > 4096 {
                fullEnd := pkt.Offset + pkt.Size
                if fullEnd > m.mkvSize {
                        fullEnd = m.mkvSize
                }
                fullData := m.mkvData[pkt.Offset:fullEnd]
                moreSyncPoints := source.FindVideoNALStarts(fullData)
                for moreIdx, syncOff := range moreSyncPoints {
                        if syncOff < int(readSize) {
                                continue // Already tried in the first pass
                        }
                        if syncOff+m.windowSize > len(fullData) {
                                continue
                        }
                        if m.isChunkCoveredParallel(pkt.Offset + int64(syncOff)) {
                                continue
                        }
                        moreNALSize, moreNALSizeExact := computeNALSize(moreSyncPoints, moreIdx, syncOff, len(fullData), isVideo, codecInfo.nalLengthSize)
                        region := m.tryMatchFromOffsetParallel(pkt, int64(syncOff), fullData[syncOff:], isVideo, pktLoc, moreNALSize, moreNALSizeExact)
                        if region != nil {
                                recordMatch(region, moreNALSize)
                                anyMatched = true
                                if m.isRangeCoveredParallel(pkt.Offset, pkt.Size) {
                                        break
                                }
                        }
                }
        }

        // Also try from packet start (in case it's already aligned)
        if !anyMatched {
                region := m.tryMatchFromOffsetParallel(pkt, 0, data, isVideo, pktLoc, len(data), false)
                if region != nil {
                        recordMatch(region, len(data))
                        anyMatched = true
                }
        }

        // Write back cross-packet hint (mutex-protected, consistent snapshot)
        if pktLoc.valid {
                crossHint.mu.Lock()
                crossHint.valid = true
                crossHint.fileIdx = pktLoc.fileIdx
                crossHint.offset = pktLoc.offset
                crossHint.srcEnd = pktLoc.srcEnd
                crossHint.mkvEnd = pktLoc.mkvEnd
                crossHint.mu.Unlock()
        }

        return anyMatched
}

// tryMatchFromOffsetParallel attempts hash-based matching for a NAL at the given
// offset. Returns the matched region or nil. The caller handles bookkeeping
// (adding to matchedRegions, marking coverage, updating locality state).
//
// Uses two-phase locality-aware matching:
//   - Phase 1: If packet locality exists, try the closest hash locations first.
//   - Phase 2: Fall back to trying all remaining locations.
func (m *Matcher) tryMatchFromOffsetParallel(pkt mkv.Packet, offsetInPacket int64, data []byte, isVideo bool, loc packetLocality, nalSize int, nalSizeExact bool, nalType ...byte) *matchedRegion {
        if len(data) < m.windowSize {
                return nil
        }

        m.diagTotalSyncPoints.Add(1)

        window := data[:m.windowSize]
        hash := xxhash.Sum64(window)

        // Look up in source index (read-only, thread-safe)
        locations := m.sourceIndex.Lookup(hash)
        if len(locations) == 0 {
                if isVideo {
                        m.diagVideoNALsHashNotFound.Add(1)
                        if len(nalType) > 0 {
                                m.diagNALTypeNotFound[nalType[0]].Add(1)
                        }
                        // Capture first 20 examples
                        if len(nalType) > 0 {
                                m.diagExamplesMu.Lock()
                                if m.diagExamplesCount < 20 {
                                        m.diagExamplesCount++
                                        example := fmt.Sprintf("  NAL type=%d, pktOff=%d, syncOff=%d, nalSize=%d, hash=%016x, first8bytes=%02x",
                                                nalType[0], pkt.Offset, offsetInPacket, nalSize, hash, data[:min(8, len(data))])
                                        m.diagExamplesOutput = append(m.diagExamplesOutput, example)
                                }
                                m.diagExamplesMu.Unlock()
                        }
                }
                return nil
        }

        var bestMatch *matchedRegion
        bestMatchLen := int64(0)
        triedVerify := false // whether any tryVerifyAndExpand was called

        // Track which location indices were tried in Phase 1 (small fixed-size array)
        var triedIndices [localityNearbyCount]int
        triedCount := 0

        // Phase 1: Locality-aware search — try nearby locations first (per-packet locality)
        if loc.valid && len(locations) > 1 {
                nearby := nearbyLocationIndices(locations, loc.fileIdx, loc.offset, localityNearbyCount)
                for _, idx := range nearby {
                        triedIndices[triedCount] = idx
                        triedCount++
                        l := locations[idx]

                        if m.sourceIndex.UsesESOffsets && l.IsVideo != isVideo {
                                if isVideo {
                                        m.diagVideoNALsSkippedIsVideo.Add(1)
                                }
                                continue
                        }

                        triedVerify = true
                        region := m.tryVerifyAndExpand(pkt, l, offsetInPacket, isVideo)
                        if region != nil {
                                matchLen := region.mkvEnd - region.mkvStart
                                if matchLen > bestMatchLen {
                                        bestMatch = region
                                        bestMatchLen = matchLen
                                }
                                if bestMatchLen >= localityGoodMatchThreshold || (nalSizeExact && nalSize >= m.windowSize && bestMatchLen >= int64(nalSize)) {
                                        break
                                }
                        }
                }
        }

        // Phase 2: Full search of remaining locations
        phase2Skipped := bestMatchLen >= localityGoodMatchThreshold || (nalSizeExact && nalSize >= m.windowSize && bestMatchLen >= int64(nalSize))
        if phase2Skipped {
                m.diagPhase1Skips.Add(1)
        }
        if !phase2Skipped {
                m.diagPhase2Fallbacks.Add(1)
                verifyAttempts := 0
                for i, l := range locations {
                        alreadyTried := false
                        for t := 0; t < triedCount; t++ {
                                if triedIndices[t] == i {
                                        alreadyTried = true
                                        break
                                }
                        }
                        if alreadyTried {
                                continue
                        }
                        if m.sourceIndex.UsesESOffsets && l.IsVideo != isVideo {
                                if isVideo {
                                        m.diagVideoNALsSkippedIsVideo.Add(1)
                                }
                                continue
                        }

                        triedVerify = true
                        verifyAttempts++
                        m.diagPhase2Locations.Add(1)
                        region := m.tryVerifyAndExpand(pkt, l, offsetInPacket, isVideo)
                        if region != nil {
                                matchLen := region.mkvEnd - region.mkvStart
                                if matchLen > bestMatchLen {
                                        bestMatch = region
                                        bestMatchLen = matchLen
                                }
                                if bestMatchLen >= localityGoodMatchThreshold || (nalSizeExact && nalSize >= m.windowSize && bestMatchLen >= int64(nalSize)) {
                                        m.diagPhase2EarlyExits.Add(1)
                                        break
                                }
                        }

                        if verifyAttempts >= phase2MaxVerifyAttempts {
                                m.diagPhase2Capped.Add(1)
                                break
                        }
                }
        }

        if bestMatch != nil {
                return bestMatch
        }

        if isVideo {
                if triedVerify {
                        m.diagVideoNALsVerifyFailed.Add(1)
                } else {
                        m.diagVideoNALsAllSkipped.Add(1)
                }
        }
        return nil
}

// nearbyLocationIndices returns up to N indices into locations that are closest
// to hintOffset within the same file as hintFileIndex. Locations must be pre-sorted
// by (FileIndex, Offset) via SortLocationsByOffset. Returns an empty slice if no
// locations are in the target file.
func nearbyLocationIndices(locations []source.Location, hintFileIndex uint16, hintOffset int64, maxCount int) []int {
        n := len(locations)
        if n == 0 {
                return nil
        }

        // Binary search for the insertion point of (hintFileIndex, hintOffset)
        lo, hi := 0, n
        for lo < hi {
                mid := lo + (hi-lo)/2
                loc := locations[mid]
                if loc.FileIndex < hintFileIndex || (loc.FileIndex == hintFileIndex && loc.Offset < hintOffset) {
                        lo = mid + 1
                } else {
                        hi = mid
                }
        }
        // lo is now the index of the first location >= (hintFileIndex, hintOffset)

        // Radiate outward from lo to collect the closest locations in the same file
        result := make([]int, 0, maxCount)
        left := lo - 1
        right := lo

        for len(result) < maxCount && (left >= 0 || right < n) {
                // Pick the closer of left and right candidates
                useLeft := false
                useRight := false

                leftOK := left >= 0 && locations[left].FileIndex == hintFileIndex
                rightOK := right < n && locations[right].FileIndex == hintFileIndex

                if leftOK && rightOK {
                        leftDist := hintOffset - locations[left].Offset
                        rightDist := locations[right].Offset - hintOffset
                        if leftDist < 0 {
                                leftDist = -leftDist
                        }
                        if rightDist < 0 {
                                rightDist = -rightDist
                        }
                        if leftDist <= rightDist {
                                useLeft = true
                        } else {
                                useRight = true
                        }
                } else if leftOK {
                        useLeft = true
                } else if rightOK {
                        useRight = true
                } else {
                        break // No more candidates in the target file
                }

                if useLeft {
                        result = append(result, left)
                        left--
                } else if useRight {
                        result = append(result, right)
                        right++
                }
        }

        return result
}

// isRangeCoveredParallel checks if a range is likely covered using a coverage bitmap.
// This is an O(1) check using chunk-level granularity. It may have false positives
// (multiple regions covering different chunks) but that's acceptable since we merge
// overlapping regions at the end anyway.
func (m *Matcher) isRangeCoveredParallel(offset, size int64) bool {
        // Calculate chunk range
        startChunk := offset / coverageChunkSize
        endChunk := (offset + size - 1) / coverageChunkSize

        m.coverageMu.RLock()
        defer m.coverageMu.RUnlock()

        // Check if all chunks in the range are covered
        for chunk := startChunk; chunk <= endChunk; chunk++ {
                wordIdx := chunk / 64
                bitIdx := uint(chunk % 64)
                if wordIdx >= int64(len(m.coveredChunks)) {
                        return false
                }
                if m.coveredChunks[wordIdx]&(1<<bitIdx) == 0 {
                        return false
                }
        }
        return true
}

// isChunkCoveredParallel checks if the chunk containing absOffset is already covered.
// This is used to skip sync points that fall within already-matched regions,
// avoiding redundant hash lookups and source reads.
func (m *Matcher) isChunkCoveredParallel(absOffset int64) bool {
        chunk := absOffset / coverageChunkSize
        wordIdx := chunk / 64
        bitIdx := uint(chunk % 64)

        m.coverageMu.RLock()
        defer m.coverageMu.RUnlock()

        if wordIdx >= int64(len(m.coveredChunks)) {
                return false
        }
        return m.coveredChunks[wordIdx]&(1<<bitIdx) != 0
}

// markChunksCovered marks the chunks fully contained within a region as covered.
func (m *Matcher) markChunksCovered(start, end int64) {
        // Only mark chunks that are fully contained within the region
        // First chunk that starts at or after 'start' and is fully contained
        firstFullChunk := (start + coverageChunkSize - 1) / coverageChunkSize
        // Last chunk that ends before 'end'
        lastFullChunk := (end / coverageChunkSize) - 1

        if firstFullChunk > lastFullChunk {
                // Region doesn't fully contain any chunks
                return
        }

        m.coverageMu.Lock()
        defer m.coverageMu.Unlock()

        for chunk := firstFullChunk; chunk <= lastFullChunk; chunk++ {
                wordIdx := chunk / 64
                bitIdx := uint(chunk % 64)
                if wordIdx < int64(len(m.coveredChunks)) {
                        m.coveredChunks[wordIdx] |= 1 << bitIdx
                }
        }
}

package matcher

import (
        "bufio"
        "fmt"
        "os"
)

// Entry represents a region in the MKV file and where its data comes from.
type Entry struct {
        MkvOffset        int64  // Start offset in the MKV file
        Length           int64  // Length of this region
        Source           uint16 // 0 = delta, 1+ = source file index + 1 (supports up to 65535 files)
        SourceOffset     int64  // Offset in source file (or ES offset for ES-based sources)
        IsVideo          bool   // For ES-based sources: whether this is video or audio data
        AudioSubStreamID byte   // For ES-based audio: sub-stream ID (0x80-0x87=AC3, etc.)
        IsLPCM           bool   // True if this is 16-bit LPCM audio requiring byte-swap on read
}

// Result contains the results of the matching process.
type Result struct {
        Entries        []Entry      // All entries covering the entire MKV file
        DeltaData      []byte       // Concatenated unique data (for small deltas / tests)
        DeltaFile      *DeltaWriter // File-backed delta data (for large files)
        MatchedBytes   int64        // Total bytes matched to source
        UnmatchedBytes int64        // Total bytes in delta
        MatchedPackets int          // Number of packets that matched
        TotalPackets   int          // Total number of packets processed
}

// DeltaSize returns the total size of delta data.
func (r *Result) DeltaSize() int64 {
        if r.DeltaFile != nil {
                return r.DeltaFile.Size()
        }
        return int64(len(r.DeltaData))
}

// Close cleans up resources held by the result (temp files).
func (r *Result) Close() {
        if r.DeltaFile != nil {
                r.DeltaFile.Close()
                r.DeltaFile = nil
        }
}

// DeltaWriter writes delta data to a temp file to avoid heap accumulation.
type DeltaWriter struct {
        file     *os.File
        buffered *bufio.Writer
        size     int64
}

// NewDeltaWriter creates a DeltaWriter backed by a temp file.
func NewDeltaWriter() (*DeltaWriter, error) {
        f, err := os.CreateTemp("", "mkvdup-delta-*")
        if err != nil {
                return nil, fmt.Errorf("create delta temp file: %w", err)
        }
        return &DeltaWriter{
                file:     f,
                buffered: bufio.NewWriterSize(f, 256*1024),
        }, nil
}

// Write appends data to the delta file.
func (dw *DeltaWriter) Write(data []byte) error {
        n, err := dw.buffered.Write(data)
        dw.size += int64(n)
        return err
}

// Flush ensures all buffered data is written to disk.
func (dw *DeltaWriter) Flush() error {
        return dw.buffered.Flush()
}

// Size returns the total bytes written.
func (dw *DeltaWriter) Size() int64 {
        return dw.size
}

// File returns the underlying file for reading. Must call Flush() first.
func (dw *DeltaWriter) File() *os.File {
        return dw.file
}

// Close removes the temp file.
func (dw *DeltaWriter) Close() {
        if dw.file != nil {
                name := dw.file.Name()
                dw.file.Close()
                os.Remove(name)
                dw.file = nil
        }
}

package matcher

import (
        "fmt"
        "sort"

        "github.com/stuckj/mkvdup/internal/mkv"
        "github.com/stuckj/mkvdup/internal/source"
)

// fillTrueHDGaps fills unmatched gaps in TrueHD tracks by comparing MKV data
// with source ES data between existing matched regions.
//
// MKV A_TRUEHD packets contain pure TrueHD data (AC3 stripped by the muxer).
// The source parser independently strips AC3 from the Blu-ray interleaved
// stream, but may split at slightly different boundaries. This creates small
// "extra" byte regions in the MKV that aren't in the source ES, breaking
// expansion chains from sync-point matches and leaving ~46% unmatched.
//
// The gap-fill works from existing matched regions (anchors) and fills the
// gaps between them using a greedy forward comparison that skips over extra
// MKV bytes to resynchronize with the source ES.
func (m *Matcher) fillTrueHDGaps(packets []mkv.Packet) {
        // Group packets by TrueHD track
        trackPackets := make(map[int][]mkv.Packet)
        for _, pkt := range packets {
                trackNum := int(pkt.TrackNum)
                if m.isTrueHDTrack[trackNum] {
                        trackPackets[trackNum] = append(trackPackets[trackNum], pkt)
                }
        }

        if len(trackPackets) == 0 {
                return
        }

        for trackNum, pkts := range trackPackets {
                sort.Slice(pkts, func(i, j int) bool {
                        return pkts[i].Offset < pkts[j].Offset
                })

                m.fillTrueHDTrackGaps(trackNum, pkts)
        }
}

// fillTrueHDTrackGaps fills gaps for a single TrueHD track by finding
// existing matched regions on this track and filling the gaps between them.
func (m *Matcher) fillTrueHDTrackGaps(trackNum int, pkts []mkv.Packet) {
        if len(pkts) == 0 {
                return
        }

        // Binary search to find which packet contains a given MKV offset.
        findPacketIdx := func(mkvOffset int64) int {
                lo, hi := 0, len(pkts)
                for lo < hi {
                        mid := lo + (hi-lo)/2
                        if pkts[mid].Offset+pkts[mid].Size <= mkvOffset {
                                lo = mid + 1
                        } else {
                                hi = mid
                        }
                }
                if lo < len(pkts) && mkvOffset >= pkts[lo].Offset && mkvOffset < pkts[lo].Offset+pkts[lo].Size {
                        return lo
                }
                return -1
        }

        // findFirstPacketAt returns the index of the first packet whose end is > mkvOffset.
        findFirstPacketAt := func(mkvOffset int64) int {
                lo, hi := 0, len(pkts)
                for lo < hi {
                        mid := lo + (hi-lo)/2
                        if pkts[mid].Offset+pkts[mid].Size <= mkvOffset {
                                lo = mid + 1
                        } else {
                                hi = mid
                        }
                }
                return lo
        }

        // Collect matched regions that fall within this track's packets.
        m.regionsMu.Lock()
        var trackRegions []matchedRegion
        for _, r := range m.matchedRegions {
                if findPacketIdx(r.mkvStart) >= 0 {
                        trackRegions = append(trackRegions, r)
                }
        }
        m.regionsMu.Unlock()

        if len(trackRegions) < 2 {
                if m.verboseWriter != nil {
                        fmt.Fprintf(m.verboseWriter, "\n[TrueHD gap-fill] track %d: only %d matched regions, need ≥2 for gap-fill\n",
                                trackNum, len(trackRegions))
                }
                return
        }

        // Sort by mkvStart for sequential gap processing.
        sort.Slice(trackRegions, func(i, j int) bool {
                return trackRegions[i].mkvStart < trackRegions[j].mkvStart
        })

        if m.verboseWriter != nil {
                fmt.Fprintf(m.verboseWriter, "\n[TrueHD gap-fill] track %d: %d matched regions, fileIndex=%d, subStreamID=0x%02X\n",
                        trackNum, len(trackRegions), trackRegions[0].fileIndex, trackRegions[0].audioSubStreamID)
        }

        // Fill gaps between adjacent matched regions.
        var newRegions []matchedRegion
        var totalFilledBytes, totalGapBytes, gapsFilled, gapsSkipped int64

        for i := 0; i < len(trackRegions)-1; i++ {
                prev := trackRegions[i]
                next := trackRegions[i+1]

                // Verify both regions use the same source
                if prev.fileIndex != next.fileIndex || prev.audioSubStreamID != next.audioSubStreamID {
                        continue
                }

                gapMKVStart := prev.mkvEnd
                gapMKVEnd := next.mkvStart
                if gapMKVEnd <= gapMKVStart {
                        continue
                }

                // Source ES gap: from end of prev's source range to start of next's source range
                prevSrcEnd := prev.srcOffset + (prev.mkvEnd - prev.mkvStart)
                nextSrcStart := next.srcOffset
                srcGapSize := nextSrcStart - prevSrcEnd
                // srcGapSize <= 0 means overlapping or backwards source offsets (invalid gap);
                // srcGapSize < 16 means too small to produce a meaningful match run.
                if srcGapSize <= 0 || srcGapSize < 16 {
                        gapsSkipped++
                        continue
                }

                // Collect TrueHD packet segments within the gap.
                // Only compare bytes within actual TrueHD packets, skipping
                // interleaved video/audio data from other tracks.
                startPkt := findFirstPacketAt(gapMKVStart)
                var segments []mkvSegment

                for p := startPkt; p < len(pkts) && pkts[p].Offset < gapMKVEnd; p++ {
                        pkt := pkts[p]
                        segStart := max(pkt.Offset, gapMKVStart)
                        segEnd := min(pkt.Offset+pkt.Size, gapMKVEnd)
                        if segEnd > m.mkvSize {
                                segEnd = m.mkvSize
                        }
                        if segStart < segEnd {
                                segments = append(segments, mkvSegment{segStart, segEnd})
                                totalGapBytes += segEnd - segStart
                        }
                }

                if len(segments) == 0 {
                        gapsSkipped++
                        continue
                }

                regions := m.fillTrueHDGapSegments(segments, prevSrcEnd, srcGapSize, prev.fileIndex, prev.audioSubStreamID)
                if len(regions) > 0 {
                        newRegions = append(newRegions, regions...)
                        gapsFilled++
                        for _, r := range regions {
                                totalFilledBytes += r.mkvEnd - r.mkvStart
                        }
                }
        }

        // Add all new regions
        if len(newRegions) > 0 {
                m.regionsMu.Lock()
                m.matchedRegions = append(m.matchedRegions, newRegions...)
                m.regionsMu.Unlock()
                for i := range newRegions {
                        m.markChunksCovered(newRegions[i].mkvStart, newRegions[i].mkvEnd)
                }
        }

        if m.verboseWriter != nil {
                fmt.Fprintf(m.verboseWriter, "[TrueHD gap-fill] track %d: filled %d gaps (%d bytes, %.2f MB), %d gaps skipped, total TrueHD gap bytes=%d (%.2f MB)\n",
                        trackNum, gapsFilled, totalFilledBytes, float64(totalFilledBytes)/(1024*1024),
                        gapsSkipped, totalGapBytes, float64(totalGapBytes)/(1024*1024))
        }
}

// mkvSegment describes a contiguous range of MKV data to compare.
type mkvSegment struct{ start, end int64 }

// fillTrueHDGapSegments fills a gap between two matched regions using greedy
// forward comparison across multiple MKV segments (TrueHD packet portions).
//
// The MKV may contain extra bytes (from AC3 splitting differences) that aren't
// in the source ES. When a mismatch occurs, the algorithm advances the MKV
// position by one byte while keeping the source position fixed, then retries.
// Matching runs of ≥16 bytes are recorded as new matched regions.
func (m *Matcher) fillTrueHDGapSegments(
        segments []mkvSegment,
        srcStart, srcSize int64,
        fileIndex uint16, subStreamID byte,
) []matchedRegion {
        if srcSize <= 0 {
                return nil
        }

        // Read source ES data for the gap
        loc := source.Location{
                FileIndex:        fileIndex,
                Offset:           srcStart,
                IsVideo:          false,
                AudioSubStreamID: subStreamID,
        }
        srcData, err := m.sourceIndex.ReadESDataAt(loc, int(srcSize))
        if err != nil || len(srcData) == 0 {
                return nil
        }

        const minRunLen = 16
        var regions []matchedRegion
        srcIdx := 0
        runMKVStart := int64(-1)
        runSrcStart := -1

        // Walk each MKV segment (TrueHD packet data only)
        for _, seg := range segments {
                if srcIdx >= len(srcData) {
                        break
                }
                if seg.end > m.mkvSize {
                        continue
                }
                mkvData := m.mkvData[seg.start:seg.end]

                for mkvOff := 0; mkvOff < len(mkvData) && srcIdx < len(srcData); {
                        mkvAbsPos := seg.start + int64(mkvOff)

                        if mkvData[mkvOff] == srcData[srcIdx] {
                                if runMKVStart < 0 {
                                        runMKVStart = mkvAbsPos
                                        runSrcStart = srcIdx
                                }
                                mkvOff++
                                srcIdx++
                        } else {
                                // Flush any pending run
                                if runMKVStart >= 0 {
                                        runLen := mkvAbsPos - runMKVStart
                                        if runLen >= minRunLen {
                                                regions = append(regions, matchedRegion{
                                                        mkvStart:         runMKVStart,
                                                        mkvEnd:           mkvAbsPos,
                                                        fileIndex:        fileIndex,
                                                        srcOffset:        srcStart + int64(runSrcStart),
                                                        isVideo:          false,
                                                        audioSubStreamID: subStreamID,
                                                })
                                        }
                                        runMKVStart = -1
                                        runSrcStart = -1
                                }
                                // Skip forward in MKV (extra byte not in source ES)
                                mkvOff++
                        }
                }

                // At segment boundary: flush any pending run since the next segment
                // starts at a different MKV offset (non-TrueHD data between packets).
                if runMKVStart >= 0 {
                        runEnd := seg.end
                        runLen := runEnd - runMKVStart
                        if runLen >= minRunLen {
                                regions = append(regions, matchedRegion{
                                        mkvStart:         runMKVStart,
                                        mkvEnd:           runEnd,
                                        fileIndex:        fileIndex,
                                        srcOffset:        srcStart + int64(runSrcStart),
                                        isVideo:          false,
                                        audioSubStreamID: subStreamID,
                                })
                        }
                        runMKVStart = -1
                        runSrcStart = -1
                }
        }

        return regions
}

// Package mkv provides functionality for parsing MKV (Matroska) files.
package mkv

import (
        "encoding/binary"
        "errors"
        "fmt"
        "io"
)

// EBML Element IDs (Matroska specification)
const (
        // EBML Header elements
        IDEBMLHeader        = 0x1A45DFA3
        IDEBMLVersion       = 0x4286
        IDEBMLReadVersion   = 0x42F7
        IDEBMLMaxIDLength   = 0x42F2
        IDEBMLMaxSizeLength = 0x42F3
        IDDocType           = 0x4282
        IDDocTypeVersion    = 0x4287
        IDDocTypeReadVer    = 0x4285

        // Segment and top-level elements
        IDSegment  = 0x18538067
        IDSeekHead = 0x114D9B74
        IDInfo     = 0x1549A966
        IDTracks   = 0x1654AE6B
        IDChapters = 0x1043A770
        IDCluster  = 0x1F43B675
        IDCues     = 0x1C53BB6B
        IDTags     = 0x1254C367

        // Cluster elements
        IDTimestamp   = 0xE7
        IDSimpleBlock = 0xA3
        IDBlockGroup  = 0xA0
        IDBlock       = 0xA1

        // Track elements
        IDTrackEntry   = 0xAE
        IDTrackNum     = 0xD7
        IDTrackUID     = 0x73C5
        IDTrackType    = 0x83
        IDCodecID      = 0x86
        IDCodecPrivate = 0x63A2
)

// Track types
const (
        TrackTypeVideo    = 1
        TrackTypeAudio    = 2
        TrackTypeComplex  = 3
        TrackTypeLogo     = 0x10
        TrackTypeSubtitle = 0x11
        TrackTypeButtons  = 0x12
        TrackTypeControl  = 0x20
)

// ErrInvalidEBML is returned when EBML parsing fails.
var ErrInvalidEBML = errors.New("invalid EBML data")

// Element represents a parsed EBML element.
type Element struct {
        ID         uint64 // Element ID (variable length)
        Size       int64  // Element size (-1 for unknown size)
        DataOffset int64  // Offset of element data in file
        HeaderSize int    // Size of ID + Size encoding
}

// ReadElementHeader reads an EBML element header (ID and size) from the reader.
// Returns the element info and any error encountered.
func ReadElementHeader(r io.Reader, offset int64) (Element, error) {
        elem := Element{DataOffset: offset}

        // Read element ID (variable length, 1-4 bytes)
        id, idLen, err := readVINT(r, true)
        if err != nil {
                return elem, fmt.Errorf("read element ID: %w", err)
        }
        elem.ID = id
        elem.HeaderSize = idLen

        // Read element size (variable length, 1-8 bytes)
        size, sizeLen, err := readVINT(r, false)
        if err != nil {
                return elem, fmt.Errorf("read element size: %w", err)
        }
        elem.HeaderSize += sizeLen

        // Handle unknown size (all 1 bits after VINT marker)
        if isUnknownSize(size, sizeLen) {
                elem.Size = -1
        } else {
                elem.Size = int64(size)
        }

        elem.DataOffset = offset + int64(elem.HeaderSize)

        return elem, nil
}

// readVINT reads a variable-length integer (VINT) used in EBML.
// If keepMarker is true, the VINT marker bit is preserved (used for IDs).
// Returns the value, number of bytes read, and any error.
func readVINT(r io.Reader, keepMarker bool) (uint64, int, error) {
        // Read first byte to determine length
        var firstByte [1]byte
        if _, err := io.ReadFull(r, firstByte[:]); err != nil {
                return 0, 0, err
        }

        b := firstByte[0]
        if b == 0 {
                return 0, 0, ErrInvalidEBML
        }

        // Determine length from leading zeros
        var length int
        var mask byte
        switch {
        case b&0x80 != 0:
                length = 1
                mask = 0x7F
        case b&0x40 != 0:
                length = 2
                mask = 0x3F
        case b&0x20 != 0:
                length = 3
                mask = 0x1F
        case b&0x10 != 0:
                length = 4
                mask = 0x0F
        case b&0x08 != 0:
                length = 5
                mask = 0x07
        case b&0x04 != 0:
                length = 6
                mask = 0x03
        case b&0x02 != 0:
                length = 7
                mask = 0x01
        case b&0x01 != 0:
                length = 8
                mask = 0x00
        default:
                return 0, 0, ErrInvalidEBML
        }

        // Build the value
        var value uint64
        if keepMarker {
                value = uint64(b)
        } else {
                value = uint64(b & mask)
        }

        // Read remaining bytes
        if length > 1 {
                remaining := make([]byte, length-1)
                if _, err := io.ReadFull(r, remaining); err != nil {
                        return 0, 0, err
                }
                for _, rb := range remaining {
                        value = (value << 8) | uint64(rb)
                }
        }

        return value, length, nil
}

// isUnknownSize checks if a VINT value represents "unknown size".
// Unknown size is represented by all data bits being 1.
func isUnknownSize(value uint64, length int) bool {
        // Unknown size values: 0x7F (1 byte), 0x3FFF (2 bytes), etc.
        maxValues := []uint64{
                0x7F,
                0x3FFF,
                0x1FFFFF,
                0x0FFFFFFF,
                0x07FFFFFFFF,
                0x03FFFFFFFFFF,
                0x01FFFFFFFFFFFF,
                0x00FFFFFFFFFFFFFF,
        }
        if length < 1 || length > 8 {
                return false
        }
        return value == maxValues[length-1]
}

// ReadUint reads an unsigned integer element value.
func ReadUint(r io.Reader, size int64) (uint64, error) {
        if size < 0 || size > 8 {
                return 0, fmt.Errorf("invalid uint size: %d", size)
        }
        if size == 0 {
                return 0, nil
        }

        buf := make([]byte, size)
        if _, err := io.ReadFull(r, buf); err != nil {
                return 0, err
        }

        var value uint64
        for _, b := range buf {
                value = (value << 8) | uint64(b)
        }
        return value, nil
}

// ReadInt reads a signed integer element value.
func ReadInt(r io.Reader, size int64) (int64, error) {
        u, err := ReadUint(r, size)
        if err != nil {
                return 0, err
        }

        // Sign extend if necessary
        if size > 0 && u>>(uint(size)*8-1) != 0 {
                // Negative number - extend sign
                mask := ^uint64(0) << (uint(size) * 8)
                return int64(u | mask), nil
        }
        return int64(u), nil
}

// ReadString reads a string element value.
func ReadString(r io.Reader, size int64) (string, error) {
        if size < 0 {
                return "", fmt.Errorf("invalid string size: %d", size)
        }
        if size == 0 {
                return "", nil
        }

        buf := make([]byte, size)
        if _, err := io.ReadFull(r, buf); err != nil {
                return "", err
        }

        // Trim null bytes
        for i := len(buf) - 1; i >= 0 && buf[i] == 0; i-- {
                buf = buf[:i]
        }

        return string(buf), nil
}

// ReadBinary reads binary data of the specified size.
func ReadBinary(r io.Reader, size int64) ([]byte, error) {
        if size < 0 {
                return nil, fmt.Errorf("invalid binary size: %d", size)
        }
        if size == 0 {
                return nil, nil
        }

        buf := make([]byte, size)
        if _, err := io.ReadFull(r, buf); err != nil {
                return nil, err
        }
        return buf, nil
}

// SimpleBlockHeader contains the decoded header of a SimpleBlock.
type SimpleBlockHeader struct {
        TrackNumber uint64
        Timestamp   int16 // Relative to cluster timestamp
        Flags       byte  // Keyframe, invisible, lacing, discardable
        HeaderSize  int   // Total header size in bytes
}

// Block flags
const (
        FlagKeyframe    = 0x80
        FlagInvisible   = 0x08
        FlagLacing      = 0x06 // Mask for lacing type
        FlagDiscardable = 0x01
)

// Lacing types
const (
        LacingNone  = 0x00
        LacingXiph  = 0x02
        LacingFixed = 0x04
        LacingEBML  = 0x06
)

// ParseSimpleBlockHeader parses the header of a SimpleBlock element.
// The data should start at the beginning of the SimpleBlock element data (after ID and size).
func ParseSimpleBlockHeader(data []byte) (SimpleBlockHeader, error) {
        if len(data) < 4 {
                return SimpleBlockHeader{}, fmt.Errorf("SimpleBlock too short: %d bytes", len(data))
        }

        var header SimpleBlockHeader
        offset := 0

        // Track number (VINT without marker)
        trackNum, trackLen := parseVINTFromBytes(data[offset:])
        header.TrackNumber = trackNum
        offset += trackLen

        if offset+3 > len(data) {
                return SimpleBlockHeader{}, fmt.Errorf("SimpleBlock header truncated")
        }

        // Timestamp (2 bytes, signed, big-endian)
        header.Timestamp = int16(binary.BigEndian.Uint16(data[offset:]))
        offset += 2

        // Flags (1 byte)
        header.Flags = data[offset]
        offset++

        header.HeaderSize = offset
        return header, nil
}

// parseVINTFromBytes parses a VINT from a byte slice (without marker preservation).
func parseVINTFromBytes(data []byte) (uint64, int) {
        if len(data) == 0 {
                return 0, 0
        }

        b := data[0]
        if b == 0 {
                return 0, 0
        }

        var length int
        var mask byte
        switch {
        case b&0x80 != 0:
                length = 1
                mask = 0x7F
        case b&0x40 != 0:
                length = 2
                mask = 0x3F
        case b&0x20 != 0:
                length = 3
                mask = 0x1F
        case b&0x10 != 0:
                length = 4
                mask = 0x0F
        default:
                return 0, 0
        }

        if len(data) < length {
                return 0, 0
        }

        var value uint64 = uint64(b & mask)
        for i := 1; i < length; i++ {
                value = (value << 8) | uint64(data[i])
        }

        return value, length
}

// IsKeyframe returns true if the SimpleBlock/Block is a keyframe.
func (h SimpleBlockHeader) IsKeyframe() bool {
        return h.Flags&FlagKeyframe != 0
}

// LacingType returns the lacing type used in the block.
func (h SimpleBlockHeader) LacingType() byte {
        return h.Flags & FlagLacing
}

package mkv

import (
        "bytes"
        "fmt"
        "io"
        "os"

        "github.com/stuckj/mkvdup/internal/mmap"
)

// Packet represents a codec data packet extracted from an MKV file.
type Packet struct {
        Offset    int64  // Offset in the MKV file where packet data starts
        Size      int64  // Size of packet data
        TrackNum  uint64 // Track number this packet belongs to
        Timestamp int64  // Absolute timestamp (cluster + block relative)
        Keyframe  bool   // Whether this is a keyframe
}

// Track represents an MKV track (video, audio, etc).
type Track struct {
        Number       uint64
        UID          uint64
        Type         int
        CodecID      string
        CodecPrivate []byte // Codec-specific init data (zero-copy slice into mmap'd data)
}

// Parser parses MKV files to extract codec packets.
type Parser struct {
        path     string
        mmapFile *mmap.File
        data     []byte // Zero-copy mmap'd data
        size     int64
        tracks   []Track
        packets  []Packet
}

// NewParser creates a new MKV parser for the given file.
func NewParser(path string) (*Parser, error) {
        info, err := os.Stat(path)
        if err != nil {
                return nil, fmt.Errorf("stat file: %w", err)
        }

        mmapFile, err := mmap.Open(path)
        if err != nil {
                return nil, fmt.Errorf("mmap file: %w", err)
        }

        return &Parser{
                path:     path,
                mmapFile: mmapFile,
                data:     mmapFile.Data(),
                size:     info.Size(),
        }, nil
}

// Close releases resources used by the parser.
func (p *Parser) Close() error {
        if p.mmapFile != nil {
                return p.mmapFile.Close()
        }
        return nil
}

// Size returns the file size.
func (p *Parser) Size() int64 {
        return p.size
}

// ProgressFunc is called to report parsing progress.
type ProgressFunc func(processed, total int64)

// Parse parses the MKV file and extracts all codec packets.
// If progress is non-nil, it will be called periodically.
func (p *Parser) Parse(progress ProgressFunc) error {
        offset := int64(0)

        // Parse EBML header
        elem, err := p.readElementAt(offset)
        if err != nil {
                return fmt.Errorf("read EBML header: %w", err)
        }
        if elem.ID != IDEBMLHeader {
                return fmt.Errorf("expected EBML header, got 0x%X", elem.ID)
        }
        offset = elem.DataOffset + elem.Size

        // Parse Segment
        elem, err = p.readElementAt(offset)
        if err != nil {
                return fmt.Errorf("read Segment: %w", err)
        }
        if elem.ID != IDSegment {
                return fmt.Errorf("expected Segment, got 0x%X", elem.ID)
        }

        segmentDataStart := elem.DataOffset
        segmentEnd := elem.DataOffset + elem.Size
        if elem.Size < 0 {
                segmentEnd = p.size
        }

        // Parse segment contents
        offset = segmentDataStart
        var clusterTimestamp int64

        for offset < segmentEnd {
                if progress != nil && offset%(1024*1024) == 0 {
                        progress(offset, p.size)
                }

                elem, err = p.readElementAt(offset)
                if err != nil {
                        if err == io.EOF {
                                break
                        }
                        return fmt.Errorf("read element at %d: %w", offset, err)
                }

                switch elem.ID {
                case IDTracks:
                        if err := p.parseTracks(elem); err != nil {
                                return fmt.Errorf("parse tracks: %w", err)
                        }

                case IDCluster:
                        if err := p.parseCluster(elem, &clusterTimestamp); err != nil {
                                return fmt.Errorf("parse cluster at %d: %w", offset, err)
                        }
                }

                // Move to next element
                if elem.Size < 0 {
                        // Unknown size - need to scan for next element
                        // For now, we'll just move past the header
                        offset = elem.DataOffset
                } else {
                        offset = elem.DataOffset + elem.Size
                }
        }

        if progress != nil {
                progress(p.size, p.size)
        }

        return nil
}

// readElementAt reads an EBML element header at the given offset.
func (p *Parser) readElementAt(offset int64) (Element, error) {
        if offset >= p.size {
                return Element{}, io.EOF
        }

        // Zero-copy: create a bytes.Reader over the slice (no data copied)
        r := bytes.NewReader(p.data[offset:])
        return ReadElementHeader(r, offset)
}

// parseTracks parses the Tracks element to extract track information.
func (p *Parser) parseTracks(tracksElem Element) error {
        offset := tracksElem.DataOffset
        end := tracksElem.DataOffset + tracksElem.Size

        for offset < end {
                elem, err := p.readElementAt(offset)
                if err != nil {
                        return err
                }

                if elem.ID == IDTrackEntry {
                        track, err := p.parseTrackEntry(elem)
                        if err != nil {
                                return fmt.Errorf("parse track entry: %w", err)
                        }
                        p.tracks = append(p.tracks, track)
                }

                offset = elem.DataOffset + elem.Size
        }

        return nil
}

// parseTrackEntry parses a TrackEntry element.
func (p *Parser) parseTrackEntry(trackElem Element) (Track, error) {
        var track Track
        offset := trackElem.DataOffset
        end := trackElem.DataOffset + trackElem.Size

        for offset < end {
                elem, err := p.readElementAt(offset)
                if err != nil {
                        return track, err
                }

                // Zero-copy: create a bytes.Reader over the slice
                r := bytes.NewReader(p.data[elem.DataOffset : elem.DataOffset+elem.Size])

                switch elem.ID {
                case IDTrackNum:
                        track.Number, _ = ReadUint(r, elem.Size)
                case IDTrackUID:
                        track.UID, _ = ReadUint(r, elem.Size)
                case IDTrackType:
                        t, _ := ReadUint(r, elem.Size)
                        track.Type = int(t)
                case IDCodecID:
                        track.CodecID, _ = ReadString(r, elem.Size)
                case IDCodecPrivate:
                        // Zero-copy: slice directly into mmap'd data
                        track.CodecPrivate = p.data[elem.DataOffset : elem.DataOffset+elem.Size]
                }

                offset = elem.DataOffset + elem.Size
        }

        return track, nil
}

// parseCluster parses a Cluster element and extracts packets.
func (p *Parser) parseCluster(clusterElem Element, clusterTimestamp *int64) error {
        offset := clusterElem.DataOffset
        end := clusterElem.DataOffset + clusterElem.Size
        if clusterElem.Size < 0 {
                // Unknown size - parse until we hit another top-level element
                end = p.size
        }

        for offset < end {
                elem, err := p.readElementAt(offset)
                if err != nil {
                        if err == io.EOF {
                                break
                        }
                        return err
                }

                // Check if we've hit a top-level element (end of cluster with unknown size)
                if isTopLevelElement(elem.ID) && clusterElem.Size < 0 {
                        break
                }

                switch elem.ID {
                case IDTimestamp:
                        // Zero-copy: create a bytes.Reader over the slice
                        r := bytes.NewReader(p.data[elem.DataOffset : elem.DataOffset+elem.Size])
                        ts, _ := ReadUint(r, elem.Size)
                        *clusterTimestamp = int64(ts)

                case IDSimpleBlock:
                        if err := p.parseSimpleBlock(elem, *clusterTimestamp); err != nil {
                                return fmt.Errorf("parse SimpleBlock: %w", err)
                        }

                case IDBlockGroup:
                        if err := p.parseBlockGroup(elem, *clusterTimestamp); err != nil {
                                return fmt.Errorf("parse BlockGroup: %w", err)
                        }
                }

                offset = elem.DataOffset + elem.Size
        }

        return nil
}

// parseSimpleBlock parses a SimpleBlock element and adds packets.
func (p *Parser) parseSimpleBlock(elem Element, clusterTimestamp int64) error {
        // Zero-copy: read header bytes directly from mmap'd data
        readSize := elem.Size
        if readSize > 16 {
                readSize = 16 // More than enough for header
        }

        endOffset := elem.DataOffset + readSize
        if endOffset > p.size {
                endOffset = p.size
        }
        headerBuf := p.data[elem.DataOffset:endOffset]
        if len(headerBuf) < 4 {
                return fmt.Errorf("read SimpleBlock header: data too short")
        }

        header, err := ParseSimpleBlockHeader(headerBuf)
        if err != nil {
                return err
        }

        // The packet data follows the header
        packetOffset := elem.DataOffset + int64(header.HeaderSize)
        packetSize := elem.Size - int64(header.HeaderSize)

        // Handle lacing if present
        if header.LacingType() != LacingNone {
                // For now, treat the entire laced data as one packet
                // A more complete implementation would parse individual frames
                p.packets = append(p.packets, Packet{
                        Offset:    packetOffset,
                        Size:      packetSize,
                        TrackNum:  header.TrackNumber,
                        Timestamp: clusterTimestamp + int64(header.Timestamp),
                        Keyframe:  header.IsKeyframe(),
                })
        } else {
                p.packets = append(p.packets, Packet{
                        Offset:    packetOffset,
                        Size:      packetSize,
                        TrackNum:  header.TrackNumber,
                        Timestamp: clusterTimestamp + int64(header.Timestamp),
                        Keyframe:  header.IsKeyframe(),
                })
        }

        return nil
}

// parseBlockGroup parses a BlockGroup element and adds packets.
func (p *Parser) parseBlockGroup(groupElem Element, clusterTimestamp int64) error {
        offset := groupElem.DataOffset
        end := groupElem.DataOffset + groupElem.Size

        for offset < end {
                elem, err := p.readElementAt(offset)
                if err != nil {
                        return err
                }

                if elem.ID == IDBlock {
                        // Block has same format as SimpleBlock for the header
                        // Zero-copy: read header bytes directly from mmap'd data
                        readSize := elem.Size
                        if readSize > 16 {
                                readSize = 16
                        }

                        endOffset := elem.DataOffset + readSize
                        if endOffset > p.size {
                                endOffset = p.size
                        }
                        headerBuf := p.data[elem.DataOffset:endOffset]
                        if len(headerBuf) < 4 {
                                return fmt.Errorf("read Block header: data too short")
                        }

                        header, err := ParseSimpleBlockHeader(headerBuf)
                        if err != nil {
                                return err
                        }

                        packetOffset := elem.DataOffset + int64(header.HeaderSize)
                        packetSize := elem.Size - int64(header.HeaderSize)

                        p.packets = append(p.packets, Packet{
                                Offset:    packetOffset,
                                Size:      packetSize,
                                TrackNum:  header.TrackNumber,
                                Timestamp: clusterTimestamp + int64(header.Timestamp),
                                Keyframe:  false, // Block doesn't have keyframe flag, would need ReferenceBlock
                        })
                }

                offset = elem.DataOffset + elem.Size
        }

        return nil
}

// isTopLevelElement returns true if the element ID is a top-level segment child.
func isTopLevelElement(id uint64) bool {
        switch id {
        case IDSeekHead, IDInfo, IDTracks, IDChapters, IDCluster, IDCues, IDTags:
                return true
        }
        return false
}

// Packets returns all parsed packets.
func (p *Parser) Packets() []Packet {
        return p.packets
}

// ParseTracksOnly parses only the track headers from the MKV file.
// This is much faster than Parse() since it stops as soon as the Tracks
// element is found, without scanning through clusters/packets.
func (p *Parser) ParseTracksOnly() error {
        offset := int64(0)

        // Parse EBML header
        elem, err := p.readElementAt(offset)
        if err != nil {
                return fmt.Errorf("read EBML header: %w", err)
        }
        if elem.ID != IDEBMLHeader {
                return fmt.Errorf("expected EBML header, got 0x%X", elem.ID)
        }
        offset = elem.DataOffset + elem.Size

        // Parse Segment
        elem, err = p.readElementAt(offset)
        if err != nil {
                return fmt.Errorf("read Segment: %w", err)
        }
        if elem.ID != IDSegment {
                return fmt.Errorf("expected Segment, got 0x%X", elem.ID)
        }

        segmentDataStart := elem.DataOffset
        segmentEnd := elem.DataOffset + elem.Size
        if elem.Size < 0 {
                segmentEnd = p.size
        }

        // Scan segment children until we find Tracks
        offset = segmentDataStart
        for offset < segmentEnd {
                elem, err = p.readElementAt(offset)
                if err != nil {
                        return fmt.Errorf("read element at %d: %w", offset, err)
                }

                if elem.ID == IDTracks {
                        if err := p.parseTracks(elem); err != nil {
                                return fmt.Errorf("parse tracks: %w", err)
                        }
                        return nil
                }

                // Skip to next element
                if elem.Size < 0 {
                        return fmt.Errorf("unsupported unknown-size element 0x%X before Tracks", elem.ID)
                }
                offset = elem.DataOffset + elem.Size
        }

        return fmt.Errorf("no Tracks element found")
}

// Tracks returns all parsed tracks.
func (p *Parser) Tracks() []Track {
        return p.tracks
}

// PacketCount returns the number of packets parsed.
func (p *Parser) PacketCount() int {
        return len(p.packets)
}

// VideoPacketCount returns the number of video packets.
func (p *Parser) VideoPacketCount() int {
        count := 0
        videoTracks := make(map[uint64]bool)
        for _, t := range p.tracks {
                if t.Type == TrackTypeVideo {
                        videoTracks[t.Number] = true
                }
        }
        for _, pkt := range p.packets {
                if videoTracks[pkt.TrackNum] {
                        count++
                }
        }
        return count
}

// AudioPacketCount returns the number of audio packets.
func (p *Parser) AudioPacketCount() int {
        count := 0
        audioTracks := make(map[uint64]bool)
        for _, t := range p.tracks {
                if t.Type == TrackTypeAudio {
                        audioTracks[t.Number] = true
                }
        }
        for _, pkt := range p.packets {
                if audioTracks[pkt.TrackNum] {
                        count++
                }
        }
        return count
}

// ReadPacketData reads the data for a packet.
// Returns a slice into the mmap'd data (zero-copy).
// The returned slice is valid until Close() is called.
func (p *Parser) ReadPacketData(pkt Packet) ([]byte, error) {
        endOffset := pkt.Offset + pkt.Size
        if endOffset > p.size {
                endOffset = p.size
        }
        if pkt.Offset >= p.size {
                return nil, fmt.Errorf("read packet data: offset out of range")
        }
        // Zero-copy: return slice directly into mmap'd data
        return p.data[pkt.Offset:endOffset], nil
}

// Data returns the raw mmap'd file data for zero-copy access.
// The returned slice is valid until Close() is called.
func (p *Parser) Data() []byte {
        return p.data
}

// Package mmap provides zero-copy memory-mapped file access.
package mmap

import (
        "fmt"
        "io"
        "os"

        "golang.org/x/sys/unix"
)

// SourceFile provides read access to a source file, either via mmap or pread.
type SourceFile interface {
        io.ReaderAt
        Size() int64
        Close() error
}

// MmapData provides zero-copy access to a memory-mapped file's data.
// Types implementing this interface allow callers to use direct slice access
// instead of copying through ReadAt.
type MmapData interface {
        Data() []byte
}

// File provides zero-copy access to a memory-mapped file.
// Unlike golang.org/x/exp/mmap, this exposes the raw []byte slice
// allowing direct access without copying data.
type File struct {
        data []byte
        size int64
}

// Open opens a file and memory-maps it for reading.
// The returned File provides zero-copy access to the file contents.
func Open(path string) (*File, error) {
        f, err := os.Open(path)
        if err != nil {
                return nil, fmt.Errorf("open file: %w", err)
        }
        defer f.Close()

        info, err := f.Stat()
        if err != nil {
                return nil, fmt.Errorf("stat file: %w", err)
        }

        size := info.Size()
        if size == 0 {
                return &File{data: nil, size: 0}, nil
        }

        data, err := unix.Mmap(int(f.Fd()), 0, int(size), unix.PROT_READ, unix.MAP_SHARED)
        if err != nil {
                return nil, fmt.Errorf("mmap: %w", err)
        }

        return &File{data: data, size: size}, nil
}

// Data returns the raw byte slice for direct zero-copy access.
// The slice is valid until Close() is called.
func (m *File) Data() []byte {
        return m.data
}

// Size returns the size of the mapped file in bytes.
func (m *File) Size() int64 {
        return m.size
}

// Len returns the size of the mapped file as int (for compatibility).
func (m *File) Len() int {
        return int(m.size)
}

// Slice returns a sub-slice of the mapped data without copying.
// Returns nil if the range is out of bounds.
func (m *File) Slice(offset int64, size int) []byte {
        if offset < 0 || offset >= m.size {
                return nil
        }
        end := offset + int64(size)
        if end > m.size {
                end = m.size
        }
        return m.data[offset:end]
}

// Advise provides hints to the kernel about expected access patterns.
// Use MADV_DONTNEED to release pages (they'll be re-faulted when accessed).
// Use MADV_SEQUENTIAL to hint sequential access pattern.
func (m *File) Advise(advice int) error {
        if len(m.data) == 0 {
                return nil
        }
        return unix.Madvise(m.data, advice)
}

// ReadAt implements io.ReaderAt by copying from the mmap'd data.
func (m *File) ReadAt(p []byte, off int64) (int, error) {
        if len(p) == 0 {
                return 0, nil
        }
        if off < 0 {
                return 0, os.ErrInvalid
        }
        if off >= m.size {
                return 0, io.EOF
        }
        n := copy(p, m.data[off:])
        if n < len(p) {
                return n, io.EOF
        }
        return n, nil
}

// Close unmaps the file from memory.
func (m *File) Close() error {
        if m.data == nil {
                return nil
        }

        if err := unix.Munmap(m.data); err != nil {
                return err
        }

        m.data = nil
        m.size = 0
        return nil
}

package mmap

import (
        "errors"
        "fmt"
        "io"
        "os"
        "sync"
        "time"

        "golang.org/x/sys/unix"
)

// ReadTimeoutError is returned when a pread operation exceeds the configured timeout.
type ReadTimeoutError struct {
        Path    string
        Timeout time.Duration
}

func (e *ReadTimeoutError) Error() string {
        return fmt.Sprintf("pread timeout after %s: %s", e.Timeout, e.Path)
}

// ReadBackpressureError is returned when all inflight read slots are occupied,
// indicating the network FS is likely stalled. This is distinct from
// ReadTimeoutError, which indicates a single read exceeded its deadline.
type ReadBackpressureError struct {
        Path string
}

func (e *ReadBackpressureError) Error() string {
        return fmt.Sprintf("pread backpressure: all %d inflight slots occupied: %s", maxInflight, e.Path)
}

// maxInflight is the maximum number of concurrent in-flight read goroutines
// per PreadFile. This bounds memory/goroutine accumulation when an NFS mount
// is stalled and reads are timing out repeatedly.
const maxInflight = 16

// PreadFile provides pread(2)-based read access to a source file, with retry
// and stale handle recovery. This is used for source files on network
// filesystems (NFS, CIFS/SMB) where mmap is unsafe due to SIGBUS on
// page fault failures.
type PreadFile struct {
        mu         sync.Mutex // protects file and staleFiles
        file       *os.File
        path       string
        size       int64
        timeout    time.Duration // 0 = no timeout
        inflight   chan struct{} // semaphore bounding concurrent timeout goroutines
        staleFiles []*os.File    // old fds kept open until Close to avoid EBADF on in-flight reads
}

// OpenPread opens a file for pread-based access.
func OpenPread(path string, timeout time.Duration) (*PreadFile, error) {
        f, err := os.Open(path)
        if err != nil {
                return nil, fmt.Errorf("open file: %w", err)
        }

        info, err := f.Stat()
        if err != nil {
                f.Close()
                return nil, fmt.Errorf("stat file: %w", err)
        }

        return &PreadFile{
                file:     f,
                path:     path,
                size:     info.Size(),
                timeout:  timeout,
                inflight: make(chan struct{}, maxInflight),
        }, nil
}

// Size returns the size of the file.
func (p *PreadFile) Size() int64 {
        return p.size
}

// ReadAt reads len(buf) bytes from the file starting at byte offset off.
// If timeout is configured and the read takes too long, it returns a
// ReadTimeoutError. The underlying goroutine may continue until the kernel
// completes the I/O, but the caller is unblocked. The goroutine reads into
// a private buffer to prevent it from writing to buf after the caller has
// moved on. A per-file semaphore bounds the number of in-flight goroutines
// to prevent unbounded accumulation under a stalled NFS mount.
func (p *PreadFile) ReadAt(buf []byte, off int64) (int, error) {
        if len(buf) == 0 {
                return 0, nil
        }

        if p.timeout <= 0 {
                return p.readAtWithRetry(buf, off)
        }

        // Acquire an inflight slot (non-blocking). If all slots are occupied
        // the NFS mount is likely stalled — fail fast instead of spawning
        // more goroutines.
        select {
        case p.inflight <- struct{}{}:
        default:
                return 0, &ReadBackpressureError{Path: p.path}
        }

        type result struct {
                n   int
                err error
        }
        // Read into a private buffer so an abandoned goroutine (after timeout)
        // cannot write into buf while it is being reused by the caller.
        tmp := make([]byte, len(buf))
        ch := make(chan result, 1)
        go func() {
                defer func() { <-p.inflight }()
                n, err := p.readAtWithRetry(tmp, off)
                ch <- result{n, err}
        }()

        timer := time.NewTimer(p.timeout)
        defer timer.Stop()

        select {
        case r := <-ch:
                copy(buf[:r.n], tmp[:r.n])
                return r.n, r.err
        case <-timer.C:
                return 0, &ReadTimeoutError{Path: p.path, Timeout: p.timeout}
        }
}

// readAtWithRetry performs a pread with one retry on retryable errors,
// reopening the file descriptor if needed. The mutex is only held briefly
// to copy the fd pointer — not during the pread syscall — so Close() and
// reopen() are never blocked by a stalled network read. Old fds from
// reopen are kept in staleFiles (not closed) to avoid EBADF on
// concurrent in-flight reads; they are cleaned up on Close().
func (p *PreadFile) readAtWithRetry(buf []byte, off int64) (int, error) {
        p.mu.Lock()
        f := p.file
        p.mu.Unlock()

        if f == nil {
                return 0, os.ErrClosed
        }

        n, err := f.ReadAt(buf, off)
        if err != nil && err != io.EOF && isRetryableError(err) {
                if reopenErr := p.reopen(); reopenErr != nil {
                        return n, fmt.Errorf("pread retry failed (reopen: %w, original: %w)", reopenErr, err)
                }

                p.mu.Lock()
                f = p.file
                p.mu.Unlock()

                if f == nil {
                        return 0, os.ErrClosed
                }
                n, err = f.ReadAt(buf, off)
        }
        return n, err
}

// reopen opens a new fd and swaps it in. The old fd is not closed
// immediately because in-flight goroutines may still hold a reference
// to it (copied under the mutex before the pread syscall). Old fds are
// collected in staleFiles and cleaned up on Close().
//
// Fd accumulation is bounded in practice: reopens only occur on transient
// network errors (ESTALE, ETIMEDOUT, etc.), which are rare. Even under
// a flaky mount, each reopen adds just one fd, well within default ulimits.
func (p *PreadFile) reopen() error {
        p.mu.Lock()
        defer p.mu.Unlock()

        if p.file == nil {
                return os.ErrClosed
        }

        newFile, err := os.Open(p.path)
        if err != nil {
                return fmt.Errorf("reopen: %w", err)
        }

        info, err := newFile.Stat()
        if err != nil {
                newFile.Close()
                return fmt.Errorf("reopen stat: %w", err)
        }

        if info.Size() != p.size {
                newFile.Close()
                return fmt.Errorf("reopen: size changed (%d → %d)", p.size, info.Size())
        }

        p.staleFiles = append(p.staleFiles, p.file)
        p.file = newFile
        return nil
}

// Close closes the current file and any stale fds from previous reopens.
func (p *PreadFile) Close() error {
        p.mu.Lock()
        defer p.mu.Unlock()

        var firstErr error
        if p.file != nil {
                firstErr = p.file.Close()
                p.file = nil
        }
        for _, f := range p.staleFiles {
                if err := f.Close(); err != nil && firstErr == nil {
                        firstErr = err
                }
        }
        p.staleFiles = nil
        return firstErr
}

// isRetryableError checks if an error is a transient network FS error
// that may succeed on retry (possibly after reopening the fd).
func isRetryableError(err error) bool {
        var errno unix.Errno
        if errors.As(err, &errno) {
                switch errno {
                case unix.ESTALE, unix.ETIMEDOUT, unix.ECONNRESET, unix.EIO:
                        return true
                }
        }
        return false
}

// Package security provides file ownership and path confinement checks
// for FUSE mounts running as root.
package security

import (
        "fmt"
        "os"
        "path/filepath"
        "strings"
        "syscall"
)

// fileStatFunc is a package-level var for os.Stat, allowing test injection.
var fileStatFunc = os.Stat

// Geteuid returns the effective user ID. Exported for testing.
var Geteuid = os.Geteuid

// CheckFileOwnership validates that a file is root-owned and not
// group-writable or world-writable. Returns nil if safe, or an error
// describing the violation. Only checks when running as root (euid == 0).
// The path is resolved via EvalSymlinks before checking.
func CheckFileOwnership(path string) error {
        if Geteuid() != 0 {
                return nil
        }

        resolved, err := filepath.EvalSymlinks(path)
        if err != nil {
                return fmt.Errorf("resolve %s: %w", path, err)
        }

        return checkOwnership(resolved)
}

// CheckFileOwnershipResolved is like CheckFileOwnership but skips symlink
// resolution, assuming the caller already canonicalized the path.
// Only checks when running as root (euid == 0).
func CheckFileOwnershipResolved(path string) error {
        if Geteuid() != 0 {
                return nil
        }
        return checkOwnership(path)
}

// checkOwnership performs the actual ownership and permission checks on
// an already-resolved path.
func checkOwnership(path string) error {
        info, err := fileStatFunc(path)
        if err != nil {
                return fmt.Errorf("stat %s: %w", path, err)
        }

        stat, ok := info.Sys().(*syscall.Stat_t)
        if !ok {
                return fmt.Errorf("cannot get ownership info for %s", path)
        }

        if stat.Uid != 0 {
                return fmt.Errorf("security: %s is owned by uid %d, not root", path, stat.Uid)
        }

        mode := info.Mode()
        if mode&0020 != 0 {
                return fmt.Errorf("security: %s is group-writable (%04o)", path, mode.Perm())
        }
        if mode&0002 != 0 {
                return fmt.Errorf("security: %s is world-writable (%04o)", path, mode.Perm())
        }

        return nil
}

// CheckPathConfinement resolves sourceDir + relPath, canonicalizes via
// EvalSymlinks, and verifies the result stays within sourceDir. Returns
// the canonical path or an error. Only checks when running as root.
//
// When not running as root, returns the simple joined path without
// canonicalization (preserving existing behavior).
func CheckPathConfinement(sourceDir, relPath string) (string, error) {
        // Reject absolute paths regardless of euid — filepath.Join would
        // silently drop sourceDir for absolute relPath, allowing escape.
        if filepath.IsAbs(relPath) {
                return "", fmt.Errorf("security: absolute source path %q not allowed", relPath)
        }

        if Geteuid() != 0 {
                // Non-root: return cleaned join without canonicalization.
                // Absolute relPath is already rejected above, so Join always
                // prepends sourceDir. Note that Join cleans ".." components,
                // but confinement is not enforced in non-root mode.
                return filepath.Join(sourceDir, relPath), nil
        }

        // Canonicalize sourceDir
        canonicalDir, err := filepath.EvalSymlinks(sourceDir)
        if err != nil {
                return "", fmt.Errorf("security: resolve source dir %s: %w", sourceDir, err)
        }

        // Canonicalize the full path
        joined := filepath.Join(sourceDir, relPath)
        canonical, err := filepath.EvalSymlinks(joined)
        if err != nil {
                return "", fmt.Errorf("security: resolve source path %s: %w", joined, err)
        }

        // Use trailing separator to prevent prefix attacks
        // (e.g., /data/source-evil matching /data/source)
        if !strings.HasPrefix(canonical+"/", canonicalDir+"/") {
                return "", fmt.Errorf("security: source path %s escapes source dir %s (resolved to %s)", relPath, sourceDir, canonical)
        }

        return canonical, nil
}

// CheckDirectory validates that a path is a directory, is root-owned,
// and is not group-writable or world-writable. Returns nil if safe.
// Only checks when running as root (euid == 0).
// The path is resolved via EvalSymlinks before checking.
func CheckDirectory(dir string) error {
        if Geteuid() != 0 {
                return nil
        }

        resolved, err := filepath.EvalSymlinks(dir)
        if err != nil {
                return fmt.Errorf("resolve %s: %w", dir, err)
        }

        return checkDirectory(resolved)
}

// CheckDirectoryResolved is like CheckDirectory but skips symlink
// resolution, assuming the caller already canonicalized the path.
// Only checks when running as root (euid == 0).
func CheckDirectoryResolved(dir string) error {
        if Geteuid() != 0 {
                return nil
        }
        return checkDirectory(dir)
}

// checkDirectory performs ownership and directory checks on an
// already-resolved path.
func checkDirectory(dir string) error {
        if err := checkOwnership(dir); err != nil {
                return err
        }

        info, err := fileStatFunc(dir)
        if err != nil {
                return fmt.Errorf("stat %s: %w", dir, err)
        }

        if !info.IsDir() {
                return fmt.Errorf("security: %s is not a directory", dir)
        }

        return nil
}

package source

// FindAudioSyncPoints finds all audio sync pattern positions in the data.
// Detects AC3, DTS, TrueHD, and MPEG Audio sync patterns.
// Returns offsets where sync patterns begin.
func FindAudioSyncPoints(data []byte) []int {
        if len(data) < 2 {
                return nil
        }

        var offsets []int

        for i := 0; i <= len(data)-2; i++ {
                // AC3/E-AC3: 0B 77
                if data[i] == 0x0B && data[i+1] == 0x77 {
                        offsets = append(offsets, i)
                        continue
                }

                // DTS/DTS-HD: 7F FE 80 01
                if i <= len(data)-4 &&
                        data[i] == 0x7F && data[i+1] == 0xFE &&
                        data[i+2] == 0x80 && data[i+3] == 0x01 {
                        offsets = append(offsets, i)
                        continue
                }

                // TrueHD: F8 72 6F BA
                if i <= len(data)-4 &&
                        data[i] == 0xF8 && data[i+1] == 0x72 &&
                        data[i+2] == 0x6F && data[i+3] == 0xBA {
                        offsets = append(offsets, i)
                        continue
                }

                // MPEG Audio / AAC ADTS: FF Fx (0xFF followed by 0xF0-0xFF)
                // The sync word is 11 bits of 1s, so we check for 0xFF followed by 0xFx.
                // Validate byte 2: bitrate index 1111 (upper nibble 0xF) is reserved/invalid.
                // This eliminates massive false positives from 0xFF adaptation field padding
                // in MPEG-TS, where every consecutive byte pair in a 0xFF run would match.
                if i <= len(data)-3 &&
                        data[i] == 0xFF && (data[i+1]&0xF0) == 0xF0 &&
                        (data[i+2]&0xF0) != 0xF0 {
                        offsets = append(offsets, i)
                        continue
                }
        }

        return offsets
}

// FindAudioSyncPointsInRange finds audio sync points within a specific range of data.
// This is useful for processing large files in chunks.
func FindAudioSyncPointsInRange(data []byte, startOffset int) []int {
        if len(data) < 2 {
                return nil
        }

        var offsets []int

        for i := 0; i <= len(data)-2; i++ {
                // AC3/E-AC3: 0B 77
                if data[i] == 0x0B && data[i+1] == 0x77 {
                        offsets = append(offsets, startOffset+i)
                        continue
                }

                // DTS/DTS-HD: 7F FE 80 01
                if i <= len(data)-4 &&
                        data[i] == 0x7F && data[i+1] == 0xFE &&
                        data[i+2] == 0x80 && data[i+3] == 0x01 {
                        offsets = append(offsets, startOffset+i)
                        continue
                }

                // TrueHD: F8 72 6F BA
                if i <= len(data)-4 &&
                        data[i] == 0xF8 && data[i+1] == 0x72 &&
                        data[i+2] == 0x6F && data[i+3] == 0xBA {
                        offsets = append(offsets, startOffset+i)
                        continue
                }

                // MPEG Audio / AAC ADTS: FF Fx with valid bitrate index
                if i <= len(data)-3 &&
                        data[i] == 0xFF && (data[i+1]&0xF0) == 0xF0 &&
                        (data[i+2]&0xF0) != 0xF0 {
                        offsets = append(offsets, startOffset+i)
                        continue
                }
        }

        return offsets
}

// AC3FrameSize returns the frame size in bytes for an AC3 sync frame given
// the fscod (sample rate code, 2 bits) and frmsizecod (frame size code, 6 bits)
// from byte 4 of the sync frame. Returns 0 if the codes are invalid.
// Based on ATSC A/52 Table 5.18.
func AC3FrameSize(fscod, frmsizecod byte) int {
        if frmsizecod >= 38 || fscod >= 3 {
                return 0
        }
        // Frame sizes in 16-bit words, indexed by [fscod][frmsizecod]
        var frameSizeWords = [3][38]int{
                // 48 kHz
                {64, 64, 80, 80, 96, 96, 112, 112, 128, 128, 160, 160, 192, 192, 224, 224, 256, 256, 320, 320, 384, 384, 448, 448, 512, 512, 640, 640, 768, 768, 896, 896, 1024, 1024, 1152, 1152, 1280, 1280},
                // 44.1 kHz
                {69, 70, 87, 88, 104, 105, 121, 122, 139, 140, 174, 175, 208, 209, 243, 244, 278, 279, 348, 349, 417, 418, 487, 488, 557, 558, 696, 697, 835, 836, 975, 976, 1114, 1115, 1253, 1254, 1393, 1394},
                // 32 kHz
                {96, 96, 120, 120, 144, 144, 168, 168, 192, 192, 240, 240, 288, 288, 336, 336, 384, 384, 480, 480, 576, 576, 672, 672, 768, 768, 960, 960, 1152, 1152, 1344, 1344, 1536, 1536, 1728, 1728, 1920, 1920},
        }
        return frameSizeWords[fscod][frmsizecod] * 2
}

// DTSCoreFrameSize parses a DTS core frame header and returns the frame size
// in bytes. The data must start at the DTS sync word (7F FE 80 01) and be at
// least 7 bytes long. Returns 0 if the header is invalid.
//
// DTS core frame header layout (after 4-byte sync word):
//
//        Bit 0:     frame_type (1 bit)
//        Bits 1-5:  deficit_samples (5 bits)
//        Bit 6:     crc_present (1 bit)
//        Bits 7-13: npcmblocks (7 bits)
//        Bits 14-27: frame_size - 1 (14 bits)
//
// Reference: ETSI TS 102 114 (DTS Coherent Acoustics), confirmed against
// ffmpeg's ff_dca_parse_core_frame_header in libavcodec/dca.c.
func DTSCoreFrameSize(data []byte) int {
        if len(data) < 7 {
                return 0
        }
        // Verify sync word
        if data[0] != 0x7F || data[1] != 0xFE || data[2] != 0x80 || data[3] != 0x01 {
                return 0
        }
        // Frame size field is 14 bits starting at bit 14 after the sync word.
        // Byte 4: [frame_type(1) | deficit(5) | crc(1) | nblks[6]](8 bits)
        // Byte 5: [nblks[0] | frame_size[13:7]](8 bits)
        // Byte 6: [frame_size[6:0] | audio_mode[5]](8 bits)
        frameSizeRaw := int(data[5]&0x7F)<<7 | int(data[6]>>1)
        frameSize := frameSizeRaw + 1
        if frameSize < 96 {
                return 0 // Too small to be a valid DTS frame
        }
        return frameSize
}

// FindAllSyncPoints finds both video start codes and audio sync patterns.
// Returns combined offsets sorted by position.
func FindAllSyncPoints(data []byte) []int {
        videoOffsets := FindVideoStartCodes(data)
        audioOffsets := FindAudioSyncPoints(data)

        // Combine and sort
        combined := make([]int, 0, len(videoOffsets)+len(audioOffsets))
        combined = append(combined, videoOffsets...)
        combined = append(combined, audioOffsets...)

        // Simple insertion sort since lists are already sorted
        // and we just need to merge them
        result := make([]int, 0, len(combined))
        vi, ai := 0, 0
        for vi < len(videoOffsets) || ai < len(audioOffsets) {
                if vi >= len(videoOffsets) {
                        result = append(result, audioOffsets[ai])
                        ai++
                } else if ai >= len(audioOffsets) {
                        result = append(result, videoOffsets[vi])
                        vi++
                } else if videoOffsets[vi] <= audioOffsets[ai] {
                        result = append(result, videoOffsets[vi])
                        vi++
                } else {
                        result = append(result, audioOffsets[ai])
                        ai++
                }
        }

        return result
}

package source

import (
        "encoding/binary"
        "fmt"
        "os"
        "path/filepath"
        "strings"
)

// parseBlurayClipInfoCodecs parses a CLPI file's ProgramInfo section to extract
// codec information. CLPI files are small metadata files in BDMV/CLIPINF/ that
// authoritatively declare every elementary stream's codec type.
//
// CLPI header layout:
//
//        0x00-0x03: Type indicator ("HDMV")
//        0x04-0x07: Version string
//        0x08-0x0B: SequenceInfo start offset (4 bytes, big-endian)
//        0x0C-0x0F: ProgramInfo start offset (4 bytes, big-endian)
//
// ProgramInfo layout:
//
//        [0-3]  Section length (4 bytes, big-endian)
//        [4]    Reserved
//        [5]    Number of program sequences
//        Per sequence:
//          [0-3]  SPN_program_sequence_start (4 bytes)
//          [4-5]  program_map_PID (2 bytes)
//          [6]    num_streams_in_ps (1 byte)
//          [7]    num_groups (1 byte)
//          Per stream:
//            [0-1]  stream_PID (2 bytes)
//            [2]    stream_coding_info_length (1 byte)
//            [3]    stream_coding_type (1 byte) — same values as tsStreamTypeToCodecType
func parseBlurayClipInfoCodecs(data []byte) (*SourceCodecs, error) {
        if len(data) < 16 {
                return nil, fmt.Errorf("CLPI data too short (%d bytes)", len(data))
        }

        magic := string(data[0:4])
        if magic != "HDMV" {
                return nil, fmt.Errorf("not a CLPI file (magic: %q)", magic)
        }

        progInfoOffset := binary.BigEndian.Uint32(data[12:16])
        if progInfoOffset == 0 || int(progInfoOffset)+6 > len(data) {
                return nil, fmt.Errorf("invalid ProgramInfo offset: %d", progInfoOffset)
        }

        pi := data[progInfoOffset:]
        if len(pi) < 6 {
                return nil, fmt.Errorf("ProgramInfo section too short")
        }

        piLen := binary.BigEndian.Uint32(pi[0:4])
        if piLen == 0 {
                return nil, fmt.Errorf("empty ProgramInfo section")
        }

        // Cap the section to its declared length + header.
        pi = pi[:min(int(piLen)+4, len(pi))]

        numSeqs := int(pi[5])
        codecs := &SourceCodecs{}

        off := 6
        for range numSeqs {
                if off+8 > len(pi) {
                        break
                }
                // SPN(4) + program_map_PID(2) + num_streams(1) + num_groups(1)
                //
                // num_groups (pi[off+7]) is not processed. The Blu-ray spec is proprietary
                // and the group entry format is undocumented. In practice num_groups is
                // always 0 on real discs, and no open-source parser (libbluray, MKVToolNix)
                // processes group entries either — the field is effectively reserved.
                numStreams := int(pi[off+6])
                off += 8

                for range numStreams {
                        if off+3 > len(pi) {
                                break
                        }
                        // stream_PID(2) + ci_len(1)
                        ciLen := int(pi[off+2])

                        if ciLen > 0 && off+3 < len(pi) {
                                streamType := pi[off+3]
                                ct := tsStreamTypeToCodecType(streamType)
                                if ct != CodecUnknown {
                                        if IsVideoCodec(ct) {
                                                if !containsCodec(codecs.VideoCodecs, ct) {
                                                        codecs.VideoCodecs = append(codecs.VideoCodecs, ct)
                                                }
                                        } else if IsAudioCodec(ct) {
                                                if !containsCodec(codecs.AudioCodecs, ct) {
                                                        codecs.AudioCodecs = append(codecs.AudioCodecs, ct)
                                                }
                                        } else if IsSubtitleCodec(ct) {
                                                if !containsCodec(codecs.SubtitleCodecs, ct) {
                                                        codecs.SubtitleCodecs = append(codecs.SubtitleCodecs, ct)
                                                }
                                        }
                                }
                        }

                        off += 3 + ciLen
                }
        }

        return codecs, nil
}

// findCLPIsInISO navigates an ISO9660 filesystem to find CLPI files
// under BDMV/CLIPINF/. Returns the file extents or an error.
func findCLPIsInISO(f *os.File) ([]isoFileExtent, error) {
        rootLBA, rootLen, err := readISOPVDRoot(f)
        if err != nil {
                return nil, err
        }

        rootEntries, err := readISODirectory(f, rootLBA, rootLen)
        if err != nil {
                return nil, fmt.Errorf("read ISO root directory: %w", err)
        }

        bdmv, err := findISOEntry(rootEntries, "BDMV")
        if err != nil {
                return nil, fmt.Errorf("find BDMV directory: %w", err)
        }

        bdmvEntries, err := readISODirectory(f, uint32(bdmv.Offset/isoSectorSize), uint32(bdmv.Size))
        if err != nil {
                return nil, fmt.Errorf("read BDMV directory: %w", err)
        }

        clipinf, err := findISOEntry(bdmvEntries, "CLIPINF")
        if err != nil {
                return nil, fmt.Errorf("find CLIPINF directory: %w", err)
        }

        clipinfEntries, err := readISODirectory(f, uint32(clipinf.Offset/isoSectorSize), uint32(clipinf.Size))
        if err != nil {
                return nil, fmt.Errorf("read CLIPINF directory: %w", err)
        }

        var clpis []isoFileExtent
        for _, e := range clipinfEntries {
                if !e.IsDir && strings.HasSuffix(e.Name, ".CLPI") {
                        clpis = append(clpis, e)
                }
        }

        if len(clpis) == 0 {
                return nil, fmt.Errorf("no CLPI files found in BDMV/CLIPINF/")
        }
        return clpis, nil
}

// findCLPIsInUDF navigates a UDF filesystem to find CLPI files under BDMV/CLIPINF/.
func findCLPIsInUDF(f *os.File) ([]isoFileExtent, error) {
        ctx, err := newUDFContext(f)
        if err != nil {
                return nil, err
        }

        rootFIDs, err := ctx.readDirectoryFromFE(ctx.rootFE)
        if err != nil {
                return nil, fmt.Errorf("read UDF root directory: %w", err)
        }

        bdmvFE, err := ctx.lookupDir(rootFIDs, "BDMV")
        if err != nil {
                return nil, fmt.Errorf("find BDMV: %w", err)
        }

        bdmvFIDs, err := ctx.readDirectoryFromFE(bdmvFE)
        if err != nil {
                return nil, fmt.Errorf("read BDMV directory: %w", err)
        }

        clipinfFE, err := ctx.lookupDir(bdmvFIDs, "CLIPINF")
        if err != nil {
                return nil, fmt.Errorf("find CLIPINF: %w", err)
        }

        clipinfFIDs, err := ctx.readDirectoryFromFE(clipinfFE)
        if err != nil {
                return nil, fmt.Errorf("read CLIPINF directory: %w", err)
        }

        var clpis []isoFileExtent
        for _, fid := range clipinfFIDs {
                if fid.IsDir || fid.IsParent {
                        continue
                }
                name := strings.ToUpper(fid.Name)
                if !strings.HasSuffix(name, ".CLPI") {
                        continue
                }

                fe, err := ctx.readFileEntryAt(fid.ICBLocation)
                if err != nil {
                        continue
                }

                extents, err := ctx.resolveAllExtents(fe)
                if err != nil || len(extents) == 0 {
                        continue
                }

                clpi := isoFileExtent{
                        Name:   name,
                        Offset: extents[0].ISOOffset,
                        Size:   int64(fe.InfoLength),
                        IsDir:  false,
                }
                if !extentsContiguous(extents) {
                        clpi.Extents = extents
                }
                clpis = append(clpis, clpi)
        }

        if len(clpis) == 0 {
                return nil, fmt.Errorf("no CLPI files found in UDF BDMV/CLIPINF/")
        }
        return clpis, nil
}

// detectBlurayCodecsFromCLPIs reads CLPI files from within an ISO and returns
// the unioned codec information from all clip info files.
func detectBlurayCodecsFromCLPIs(f *os.File, clpis []isoFileExtent) (*SourceCodecs, error) {
        merged := &SourceCodecs{}
        var lastErr error
        anySuccess := false

        for _, clpi := range clpis {
                // Cap read size to prevent excessive allocation from malformed metadata.
                // Real CLPI files are ~64-78KB.
                const maxCLPISize int64 = 8 * 1024 * 1024
                data, err := readISOFileExtent(f, clpi, maxCLPISize)
                if err != nil {
                        lastErr = err
                        continue
                }

                codecs, err := parseBlurayClipInfoCodecs(data)
                if err != nil {
                        lastErr = err
                        continue
                }
                mergeSourceCodecs(merged, codecs)
                anySuccess = true
        }

        if !anySuccess {
                if lastErr != nil {
                        return nil, fmt.Errorf("failed to parse any CLPI file: %w", lastErr)
                }
                return nil, fmt.Errorf("no valid CLPI files found")
        }
        return merged, nil
}

// detectBlurayCodecsFromCLPIDir detects codecs from CLPI files in an extracted
// Blu-ray directory structure (BDMV/CLIPINF/*.clpi).
func detectBlurayCodecsFromCLPIDir(sourceDir string) (*SourceCodecs, error) {
        clipinfDir := sourceDir
        // If sourceDir doesn't end with CLIPINF, try to find it
        if !strings.HasSuffix(strings.ToUpper(sourceDir), "CLIPINF") {
                // Look for BDMV/CLIPINF relative to sourceDir
                candidates := []string{
                        filepath.Join(sourceDir, "BDMV", "CLIPINF"),
                        filepath.Join(sourceDir, "bdmv", "clipinf"),
                }
                found := false
                for _, c := range candidates {
                        if info, err := os.Stat(c); err == nil && info.IsDir() {
                                clipinfDir = c
                                found = true
                                break
                        }
                }
                if !found {
                        return nil, fmt.Errorf("BDMV/CLIPINF directory not found in %s", sourceDir)
                }
        }

        entries, err := os.ReadDir(clipinfDir)
        if err != nil {
                return nil, fmt.Errorf("read CLIPINF directory: %w", err)
        }

        merged := &SourceCodecs{}
        var lastErr error
        anySuccess := false

        for _, entry := range entries {
                if entry.IsDir() {
                        continue
                }
                name := strings.ToUpper(entry.Name())
                if !strings.HasSuffix(name, ".CLPI") {
                        continue
                }

                data, err := os.ReadFile(filepath.Join(clipinfDir, entry.Name()))
                if err != nil {
                        lastErr = err
                        continue
                }

                codecs, err := parseBlurayClipInfoCodecs(data)
                if err != nil {
                        lastErr = err
                        continue
                }
                mergeSourceCodecs(merged, codecs)
                anySuccess = true
        }

        if !anySuccess {
                if lastErr != nil {
                        return nil, fmt.Errorf("failed to parse any CLPI file: %w", lastErr)
                }
                return nil, fmt.Errorf("no valid CLPI files found")
        }
        return merged, nil
}

package source

import (
        "fmt"
        "os"
        "path/filepath"
        "strings"

        "github.com/stuckj/mkvdup/internal/mkv"
)

// CodecType represents a broad codec family.
type CodecType int

// Codec type constants.
const (
        CodecUnknown CodecType = iota
        CodecMPEG1Video
        CodecMPEG2Video
        CodecH264Video
        CodecH265Video
        CodecVC1Video
        CodecAC3Audio
        CodecEAC3Audio
        CodecDTSAudio
        CodecDTSHDAudio
        CodecTrueHDAudio
        CodecLPCMAudio
        CodecMPEGAudio
        CodecAACaudio
        CodecFLACAudio
        CodecOpusAudio
        CodecPGSSubtitle
)

// CodecTypeName returns a human-readable name for a codec type.
func CodecTypeName(ct CodecType) string {
        switch ct {
        case CodecMPEG1Video:
                return "MPEG-1"
        case CodecMPEG2Video:
                return "MPEG-2"
        case CodecH264Video:
                return "H.264"
        case CodecH265Video:
                return "H.265"
        case CodecVC1Video:
                return "VC-1"
        case CodecAC3Audio:
                return "AC3"
        case CodecEAC3Audio:
                return "E-AC3"
        case CodecDTSAudio:
                return "DTS"
        case CodecDTSHDAudio:
                return "DTS-HD"
        case CodecTrueHDAudio:
                return "TrueHD"
        case CodecLPCMAudio:
                return "LPCM"
        case CodecMPEGAudio:
                return "MPEG Audio"
        case CodecAACaudio:
                return "AAC"
        case CodecFLACAudio:
                return "FLAC"
        case CodecOpusAudio:
                return "Opus"
        case CodecPGSSubtitle:
                return "PGS"
        default:
                return "Unknown"
        }
}

// IsVideoCodec returns true if the codec type is a video codec.
func IsVideoCodec(ct CodecType) bool {
        switch ct {
        case CodecMPEG1Video, CodecMPEG2Video, CodecH264Video, CodecH265Video, CodecVC1Video:
                return true
        }
        return false
}

// IsSubtitleCodec returns true if the codec type is a subtitle codec.
func IsSubtitleCodec(ct CodecType) bool {
        return ct == CodecPGSSubtitle
}

// IsAudioCodec returns true if the codec type is an audio codec.
func IsAudioCodec(ct CodecType) bool {
        switch ct {
        case CodecAC3Audio, CodecEAC3Audio, CodecDTSAudio, CodecDTSHDAudio,
                CodecTrueHDAudio, CodecLPCMAudio, CodecMPEGAudio, CodecAACaudio,
                CodecFLACAudio, CodecOpusAudio:
                return true
        }
        return false
}

// MKVCodecToType maps an MKV CodecID string to a CodecType.
func MKVCodecToType(codecID string) CodecType {
        switch {
        case codecID == "V_MPEG1":
                return CodecMPEG1Video
        case codecID == "V_MPEG2":
                return CodecMPEG2Video
        case codecID == "V_MPEG4/ISO/AVC":
                return CodecH264Video
        case codecID == "V_MPEGH/ISO/HEVC":
                return CodecH265Video
        case codecID == "V_MS/VFW/FOURCC":
                // Could be VC-1 or other; can't determine without codec private data
                return CodecUnknown
        case codecID == "A_AC3":
                return CodecAC3Audio
        case codecID == "A_EAC3":
                return CodecEAC3Audio
        case codecID == "A_DTS":
                return CodecDTSAudio
        case strings.HasPrefix(codecID, "A_DTS/"):
                // A_DTS/EXPRESS, A_DTS/LOSSLESS, etc.
                return CodecDTSHDAudio
        case codecID == "A_TRUEHD":
                return CodecTrueHDAudio
        case strings.HasPrefix(codecID, "A_PCM/"):
                // A_PCM/INT/LIT, A_PCM/INT/BIG, A_PCM/FLOAT/IEEE
                return CodecLPCMAudio
        case strings.HasPrefix(codecID, "A_MPEG/"):
                // A_MPEG/L2, A_MPEG/L3
                return CodecMPEGAudio
        case strings.HasPrefix(codecID, "A_AAC"):
                // A_AAC, A_AAC/MPEG2/MAIN, etc.
                return CodecAACaudio
        case codecID == "A_FLAC":
                return CodecFLACAudio
        case codecID == "A_OPUS":
                return CodecOpusAudio
        case codecID == "S_HDMV/PGS":
                return CodecPGSSubtitle
        default:
                return CodecUnknown
        }
}

// SourceCodecs describes the codecs found in a source media.
type SourceCodecs struct {
        VideoCodecs    []CodecType
        AudioCodecs    []CodecType
        SubtitleCodecs []CodecType
}

// CodecMismatch describes a detected codec mismatch between MKV and source.
type CodecMismatch struct {
        TrackType    string      // "video" or "audio"
        MKVCodecID   string      // e.g. "V_MPEG4/ISO/AVC"
        MKVCodecType CodecType   // resolved codec type
        SourceCodecs []CodecType // codecs found in source for this track type
}

// DetectSourceCodecs determines what codecs are present in the source media.
// For DVD sources, it extracts codec info from the already-parsed MPEG-PS data.
// For Blu-ray sources, it performs a lightweight PMT scan of the first M2TS file.
func DetectSourceCodecs(index *Index) (*SourceCodecs, error) {
        switch index.SourceType {
        case TypeDVD:
                return detectDVDCodecs(index)
        case TypeBluray:
                return detectBlurayCodecs(index)
        default:
                return nil, fmt.Errorf("unknown source type")
        }
}

// DetectSourceCodecsFromDir performs a lightweight codec detection from a source
// directory without building the full hash index. This allows codec compatibility
// checks to run before the expensive indexing step.
//
// For Blu-ray sources, this scans the PMTs of all M2TS files of significant size
// (>10% of the largest) and unions their codecs. This is necessary because
// different episodes or playlist entries may reference different M2TS files with
// different audio tracks (e.g., a stereo AC3 track may only appear in certain
// episode M2TS files, not in the largest one).
func DetectSourceCodecsFromDir(sourceDir string) (*SourceCodecs, error) {
        sourceType, err := DetectType(sourceDir)
        if err != nil {
                return nil, fmt.Errorf("detect source type: %w", err)
        }

        files, err := EnumerateMediaFiles(sourceDir, sourceType)
        if err != nil {
                return nil, fmt.Errorf("enumerate files: %w", err)
        }
        if len(files) == 0 {
                return nil, fmt.Errorf("no media files found in %s", sourceDir)
        }

        // Stat all files to get sizes
        type fileInfo struct {
                relPath string
                size    int64
        }
        var infos []fileInfo
        var largestSize int64
        for _, f := range files {
                fullPath := filepath.Join(sourceDir, f)
                info, err := os.Stat(fullPath)
                if err != nil {
                        continue
                }
                infos = append(infos, fileInfo{f, info.Size()})
                if info.Size() > largestSize {
                        largestSize = info.Size()
                }
        }
        if len(infos) == 0 {
                return nil, fmt.Errorf("no accessible media files found")
        }

        switch sourceType {
        case TypeBluray:
                // Try CLPI metadata first for extracted Blu-ray directories.
                if codecs, err := detectBlurayCodecsFromCLPIDir(sourceDir); err == nil {
                        return codecs, nil
                }
                // Fallback: scan PMT from M2TS data.
                targets := make([]codecScanTarget, len(infos))
                for i, fi := range infos {
                        targets[i] = codecScanTarget{
                                Path: filepath.Join(sourceDir, fi.relPath),
                                Size: fi.size,
                        }
                }
                return detectBlurayCodecsMulti(significantTargets(targets))
        case TypeDVD:
                // For DVDs, use the largest file (main feature)
                var largestFile string
                for _, fi := range infos {
                        if fi.size == largestSize {
                                largestFile = fi.relPath
                                break
                        }
                }
                return detectDVDCodecsFromFile(filepath.Join(sourceDir, largestFile))
        default:
                return nil, fmt.Errorf("unknown source type")
        }
}

// CheckCodecCompatibility compares MKV track codecs against source codecs.
// Returns nil if all codecs are compatible, or a list of mismatches.
func CheckCodecCompatibility(tracks []mkv.Track, sourceCodecs *SourceCodecs) []CodecMismatch {
        var mismatches []CodecMismatch

        for _, track := range tracks {
                ct := MKVCodecToType(track.CodecID)
                if ct == CodecUnknown {
                        continue // Skip unknown codecs — no false alarms
                }

                if track.Type == mkv.TrackTypeVideo && IsVideoCodec(ct) {
                        if len(sourceCodecs.VideoCodecs) == 0 {
                                continue // No source video info available
                        }
                        if !codecFamilyMatch(ct, sourceCodecs.VideoCodecs) {
                                mismatches = append(mismatches, CodecMismatch{
                                        TrackType:    "video",
                                        MKVCodecID:   track.CodecID,
                                        MKVCodecType: ct,
                                        SourceCodecs: sourceCodecs.VideoCodecs,
                                })
                        }
                } else if track.Type == mkv.TrackTypeAudio && IsAudioCodec(ct) {
                        if len(sourceCodecs.AudioCodecs) == 0 {
                                continue // No source audio info available
                        }
                        if !codecFamilyMatch(ct, sourceCodecs.AudioCodecs) {
                                mismatches = append(mismatches, CodecMismatch{
                                        TrackType:    "audio",
                                        MKVCodecID:   track.CodecID,
                                        MKVCodecType: ct,
                                        SourceCodecs: sourceCodecs.AudioCodecs,
                                })
                        }
                } else if track.Type == mkv.TrackTypeSubtitle && IsSubtitleCodec(ct) {
                        if len(sourceCodecs.SubtitleCodecs) == 0 {
                                continue // No source subtitle info available
                        }
                        if !codecFamilyMatch(ct, sourceCodecs.SubtitleCodecs) {
                                mismatches = append(mismatches, CodecMismatch{
                                        TrackType:    "subtitle",
                                        MKVCodecID:   track.CodecID,
                                        MKVCodecType: ct,
                                        SourceCodecs: sourceCodecs.SubtitleCodecs,
                                })
                        }
                }
        }

        return mismatches
}

// codecFamilyMatch checks if a codec type is compatible with any codec in the list.
// Uses family-based matching (e.g., DTS is compatible with DTS-HD).
func codecFamilyMatch(ct CodecType, sourceCodecs []CodecType) bool {
        family := codecFamily(ct)
        for _, sc := range sourceCodecs {
                if codecFamily(sc) == family {
                        return true
                }
        }
        return false
}

// codecFamily returns the codec family for family-based matching.
// Related codecs map to the same family value.
func codecFamily(ct CodecType) int {
        switch ct {
        case CodecMPEG1Video, CodecMPEG2Video:
                return 1
        case CodecH264Video:
                return 2
        case CodecH265Video:
                return 3
        case CodecVC1Video:
                return 4
        case CodecAC3Audio, CodecEAC3Audio:
                return 10
        case CodecDTSAudio, CodecDTSHDAudio:
                return 11
        case CodecTrueHDAudio:
                return 12
        case CodecLPCMAudio:
                return 13
        case CodecMPEGAudio:
                return 14
        case CodecAACaudio:
                return 15
        case CodecFLACAudio:
                return 16
        case CodecOpusAudio:
                return 17
        case CodecPGSSubtitle:
                return 20
        default:
                return 0
        }
}

// containsCodec checks if a codec type is already in the list.
func containsCodec(codecs []CodecType, ct CodecType) bool {
        for _, c := range codecs {
                if c == ct {
                        return true
                }
        }
        return false
}

// codecScanTarget describes a file to scan for codec detection.
// Unlike isoFileExtent (which represents an ISO directory entry with an
// uppercase ISO filename), this is used for on-disk paths that may be
// M2TS files, ISOs, or other media files.
type codecScanTarget struct {
        Path string // filesystem path
        Size int64  // file size in bytes
}

// significantFiles returns the subset of ISO file extents whose size is at
// least 10% of the largest. Used for filtering M2TS/VOB entries within ISOs.
func significantFiles(files []isoFileExtent) []isoFileExtent {
        var largestSize int64
        for _, f := range files {
                if f.Size > largestSize {
                        largestSize = f.Size
                }
        }
        minSize := largestSize / 10

        var result []isoFileExtent
        for _, f := range files {
                if f.Size >= minSize {
                        result = append(result, f)
                }
        }
        return result
}

// significantTargets returns the subset of scan targets whose size is at
// least 10% of the largest. Used for filtering on-disk files for codec detection.
func significantTargets(targets []codecScanTarget) []codecScanTarget {
        var largestSize int64
        for _, t := range targets {
                if t.Size > largestSize {
                        largestSize = t.Size
                }
        }
        minSize := largestSize / 10

        var result []codecScanTarget
        for _, t := range targets {
                if t.Size >= minSize {
                        result = append(result, t)
                }
        }
        return result
}

// mergeSourceCodecs adds all codecs from src into dst, deduplicating.
func mergeSourceCodecs(dst, src *SourceCodecs) {
        for _, c := range src.VideoCodecs {
                if !containsCodec(dst.VideoCodecs, c) {
                        dst.VideoCodecs = append(dst.VideoCodecs, c)
                }
        }
        for _, c := range src.AudioCodecs {
                if !containsCodec(dst.AudioCodecs, c) {
                        dst.AudioCodecs = append(dst.AudioCodecs, c)
                }
        }
        for _, c := range src.SubtitleCodecs {
                if !containsCodec(dst.SubtitleCodecs, c) {
                        dst.SubtitleCodecs = append(dst.SubtitleCodecs, c)
                }
        }
}

package source

import (
        "encoding/binary"
        "fmt"
        "os"
        "strings"
)

// parseDVDIFOCodecs parses a VTS_xx_0.IFO file's VTS_MAT structure to extract
// video and audio codec information. The IFO file authoritatively declares
// every stream in the title set, unlike PES scanning which can miss streams
// that appear later in the VOB data.
//
// VTS_MAT layout (relevant offsets):
//
//        0x000-0x00B: "DVDVIDEO-VTS" identifier
//        0x200-0x201: VTS video attributes (2 bytes)
//        0x202-0x203: Number of VTS audio streams (2 bytes, big-endian)
//        0x204-0x243: VTS audio stream attributes (8 bytes each, max 8)
func parseDVDIFOCodecs(data []byte) (*SourceCodecs, error) {
        if len(data) < 0x244 {
                return nil, fmt.Errorf("IFO data too short (%d bytes)", len(data))
        }

        magic := string(data[0:12])
        if magic != "DVDVIDEO-VTS" {
                return nil, fmt.Errorf("not a VTS IFO file (magic: %q)", magic)
        }

        codecs := &SourceCodecs{}

        // Video attributes at offset 0x200 (2 bytes, big-endian).
        // Bits 15-14: video compression mode (0=MPEG-1, 1=MPEG-2).
        videoAttr := binary.BigEndian.Uint16(data[0x200:0x202])
        switch (videoAttr >> 14) & 0x03 {
        case 0:
                codecs.VideoCodecs = append(codecs.VideoCodecs, CodecMPEG1Video)
        case 1:
                codecs.VideoCodecs = append(codecs.VideoCodecs, CodecMPEG2Video)
        }

        // Audio stream count at offset 0x202 (2 bytes, big-endian).
        numAudio := int(binary.BigEndian.Uint16(data[0x202:0x204]))
        numAudio = min(numAudio, 8)

        // Audio attributes at offset 0x204 (8 bytes each).
        // Byte 0, bits 7-5: audio coding mode.
        for i := 0; i < numAudio; i++ {
                off := 0x204 + i*8
                // Skip all-zero entries (unused slots)
                if data[off] == 0 && data[off+1] == 0 {
                        continue
                }
                codingMode := (data[off] >> 5) & 0x07
                var ct CodecType
                switch codingMode {
                case 0:
                        ct = CodecAC3Audio
                case 2, 3:
                        ct = CodecMPEGAudio // MPEG-1 and MPEG-2ext
                case 4:
                        ct = CodecLPCMAudio
                case 6:
                        ct = CodecDTSAudio
                default:
                        continue
                }
                if !containsCodec(codecs.AudioCodecs, ct) {
                        codecs.AudioCodecs = append(codecs.AudioCodecs, ct)
                }
        }

        return codecs, nil
}

// findIFOsInISO navigates an ISO9660 filesystem to find VTS IFO files
// (VTS_xx_0.IFO) under the VIDEO_TS directory. Returns nil if navigation fails.
func findIFOsInISO(f *os.File) []isoFileExtent {
        rootLBA, rootLen, err := readISOPVDRoot(f)
        if err != nil {
                return nil
        }

        rootEntries, err := readISODirectory(f, rootLBA, rootLen)
        if err != nil {
                return nil
        }

        videoTS, err := findISOEntry(rootEntries, "VIDEO_TS")
        if err != nil {
                return nil
        }

        vtsEntries, err := readISODirectory(f, uint32(videoTS.Offset/isoSectorSize), uint32(videoTS.Size))
        if err != nil {
                return nil
        }

        var ifos []isoFileExtent
        for _, e := range vtsEntries {
                if e.IsDir {
                        continue
                }
                name := e.Name
                // Match VTS_xx_0.IFO pattern (e.g., VTS_01_0.IFO)
                if strings.HasPrefix(name, "VTS_") && strings.HasSuffix(name, ".IFO") &&
                        len(name) == 12 && name[7] == '0' {
                        ifos = append(ifos, e)
                }
        }
        return ifos
}

// findIFOsInUDF navigates a UDF filesystem to find VTS IFO files under VIDEO_TS.
func findIFOsInUDF(f *os.File) ([]isoFileExtent, error) {
        ctx, err := newUDFContext(f)
        if err != nil {
                return nil, err
        }

        rootFIDs, err := ctx.readDirectoryFromFE(ctx.rootFE)
        if err != nil {
                return nil, fmt.Errorf("read UDF root directory: %w", err)
        }

        vtsFE, err := ctx.lookupDir(rootFIDs, "VIDEO_TS")
        if err != nil {
                return nil, fmt.Errorf("find VIDEO_TS: %w", err)
        }

        vtsFIDs, err := ctx.readDirectoryFromFE(vtsFE)
        if err != nil {
                return nil, fmt.Errorf("read VIDEO_TS directory: %w", err)
        }

        var ifos []isoFileExtent
        for _, fid := range vtsFIDs {
                if fid.IsDir || fid.IsParent {
                        continue
                }
                name := strings.ToUpper(fid.Name)
                if !strings.HasPrefix(name, "VTS_") || !strings.HasSuffix(name, ".IFO") {
                        continue
                }
                if len(name) != 12 || name[7] != '0' {
                        continue
                }

                fe, err := ctx.readFileEntryAt(fid.ICBLocation)
                if err != nil || fe.InfoLength == 0 {
                        continue
                }

                extents, err := ctx.resolveAllExtents(fe)
                if err != nil || len(extents) == 0 {
                        continue
                }

                ifo := isoFileExtent{
                        Name:   name,
                        Offset: extents[0].ISOOffset,
                        Size:   int64(fe.InfoLength),
                        IsDir:  false,
                }
                if !extentsContiguous(extents) {
                        ifo.Extents = extents
                }
                ifos = append(ifos, ifo)
        }

        if len(ifos) == 0 {
                return nil, fmt.Errorf("no VTS IFO files found in UDF VIDEO_TS/")
        }
        return ifos, nil
}

// detectDVDCodecsFromIFOs reads IFO files from within an ISO and returns
// the unioned codec information from all title sets.
func detectDVDCodecsFromIFOs(f *os.File, ifos []isoFileExtent) (*SourceCodecs, error) {
        merged := &SourceCodecs{}
        var lastErr error
        anySuccess := false

        for _, ifo := range ifos {
                // We only need the first 0x244 bytes for VTS_MAT parsing.
                const maxIFOReadSize int64 = 0x244
                data, err := readISOFileExtent(f, ifo, maxIFOReadSize)
                if err != nil {
                        lastErr = err
                        continue
                }

                codecs, err := parseDVDIFOCodecs(data)
                if err != nil {
                        lastErr = err
                        continue
                }
                mergeSourceCodecs(merged, codecs)
                anySuccess = true
        }

        if !anySuccess {
                if lastErr != nil {
                        return nil, fmt.Errorf("failed to parse any VTS IFO: %w", lastErr)
                }
                return nil, fmt.Errorf("no valid VTS IFO files found")
        }
        return merged, nil
}

package source

import "fmt"

// binarySearchRanges performs binary search on PES payload ranges to find the one
// containing the given ES offset. Returns the index, or -1 if not found.
func binarySearchRanges(ranges []PESPayloadRange, esOffset int64) int {
        if len(ranges) == 0 {
                return -1
        }

        low, high := 0, len(ranges)-1
        for low <= high {
                mid := (low + high) / 2
                r := ranges[mid]
                if esOffset < r.ESOffset {
                        high = mid - 1
                } else if esOffset >= r.ESOffset+int64(r.Size) {
                        low = mid + 1
                } else {
                        return mid
                }
        }
        return -1
}

// readByteAt reads a single byte from data or multiRegion at the given file offset.
func readByteAt(data []byte, mr *multiRegionData, fileOffset int64) byte {
        if mr != nil {
                return mr.ByteAt(fileOffset)
        }
        return data[fileOffset]
}

// readByteWithHint reads a single byte from a set of PES payload ranges using a hint
// for O(1) sequential access. Returns the byte, the range index for the next hint,
// and success status. Pass rangeHint=-1 to force binary search.
// When mr is non-nil, byte reads use the multi-region data instead of data.
func readByteWithHint(data []byte, mr *multiRegionData, dataSize int64, ranges []PESPayloadRange, esOffset int64, rangeHint int) (byte, int, bool) {
        if len(ranges) == 0 {
                return 0, -1, false
        }

        // Fast path: check if hint is still valid (O(1) check)
        if rangeHint >= 0 && rangeHint < len(ranges) {
                r := ranges[rangeHint]
                if esOffset >= r.ESOffset && esOffset < r.ESOffset+int64(r.Size) {
                        offsetInPayload := esOffset - r.ESOffset
                        fileOffset := r.FileOffset + offsetInPayload
                        if fileOffset >= 0 && fileOffset < dataSize {
                                return readByteAt(data, mr, fileOffset), rangeHint, true
                        }
                }
                // Check next range (common case when crossing boundaries forward)
                if rangeHint+1 < len(ranges) {
                        r = ranges[rangeHint+1]
                        if esOffset >= r.ESOffset && esOffset < r.ESOffset+int64(r.Size) {
                                offsetInPayload := esOffset - r.ESOffset
                                fileOffset := r.FileOffset + offsetInPayload
                                if fileOffset >= 0 && fileOffset < dataSize {
                                        return readByteAt(data, mr, fileOffset), rangeHint + 1, true
                                }
                        }
                }
                // Check previous range (common case when crossing boundaries backward)
                if rangeHint-1 >= 0 {
                        r = ranges[rangeHint-1]
                        if esOffset >= r.ESOffset && esOffset < r.ESOffset+int64(r.Size) {
                                offsetInPayload := esOffset - r.ESOffset
                                fileOffset := r.FileOffset + offsetInPayload
                                if fileOffset >= 0 && fileOffset < dataSize {
                                        return readByteAt(data, mr, fileOffset), rangeHint - 1, true
                                }
                        }
                }
        }

        // Slow path: binary search
        rangeIdx := binarySearchRanges(ranges, esOffset)
        if rangeIdx < 0 {
                return 0, -1, false
        }

        r := ranges[rangeIdx]
        offsetInPayload := esOffset - r.ESOffset
        fileOffset := r.FileOffset + offsetInPayload
        if fileOffset >= 0 && fileOffset < dataSize {
                return readByteAt(data, mr, fileOffset), rangeIdx, true
        }

        return 0, -1, false
}

// readSliceAt reads a byte slice from data or multiRegion at the given file offset range.
func readSliceAt(data []byte, mr *multiRegionData, fileOffset, endOffset int64) []byte {
        if mr != nil {
                return mr.Slice(fileOffset, endOffset)
        }
        return data[fileOffset:endOffset]
}

// readFromRanges reads data from PES payload ranges starting at the given ES offset.
// Returns a zero-copy slice when data fits in a single range (common case),
// only copies when data spans multiple ranges.
// When mr is non-nil, data reads use the multi-region data instead of data.
func readFromRanges(data []byte, mr *multiRegionData, dataSize int64, ranges []PESPayloadRange, esOffset int64, size int) ([]byte, error) {
        if len(ranges) == 0 {
                return nil, fmt.Errorf("no ranges available")
        }

        // Use binary search to find starting range
        rangeIdx := binarySearchRanges(ranges, esOffset)
        if rangeIdx < 0 {
                rangeIdx = 0
                for rangeIdx < len(ranges) && esOffset >= ranges[rangeIdx].ESOffset+int64(ranges[rangeIdx].Size) {
                        rangeIdx++
                }
        }

        if rangeIdx >= len(ranges) {
                return nil, fmt.Errorf("ES offset %d not found in ranges", esOffset)
        }

        r := ranges[rangeIdx]
        if esOffset < r.ESOffset || esOffset >= r.ESOffset+int64(r.Size) {
                return nil, fmt.Errorf("ES offset %d not in range [%d, %d)", esOffset, r.ESOffset, r.ESOffset+int64(r.Size))
        }

        offsetInPayload := esOffset - r.ESOffset
        availableInRange := int64(r.Size) - offsetInPayload

        // Fast path: data fits entirely within this single range (zero-copy)
        if int64(size) <= availableInRange {
                fileOffset := r.FileOffset + offsetInPayload
                endOffset := fileOffset + int64(size)
                if endOffset > dataSize {
                        return nil, fmt.Errorf("file offset out of range")
                }
                return readSliceAt(data, mr, fileOffset, endOffset), nil
        }

        // Slow path: data spans multiple ranges — must copy
        result := make([]byte, 0, size)
        remaining := size

        for remaining > 0 && rangeIdx < len(ranges) {
                r := ranges[rangeIdx]

                if esOffset < r.ESOffset {
                        break
                }

                if esOffset >= r.ESOffset+int64(r.Size) {
                        rangeIdx++
                        continue
                }

                offsetInPayload := esOffset - r.ESOffset
                availableInRange := int64(r.Size) - offsetInPayload
                toRead := remaining
                if int64(toRead) > availableInRange {
                        toRead = int(availableInRange)
                }

                fileOffset := r.FileOffset + offsetInPayload
                endOffset := fileOffset + int64(toRead)
                if endOffset > dataSize {
                        if len(result) > 0 {
                                return result, nil
                        }
                        return nil, fmt.Errorf("failed to read ES data: offset out of range")
                }

                result = append(result, readSliceAt(data, mr, fileOffset, endOffset)...)
                esOffset += int64(toRead)
                remaining -= toRead
                rangeIdx++
        }

        return result, nil
}

// rawRangesFromPESRanges enumerates raw file ranges for a given ES region.
func rawRangesFromPESRanges(ranges []PESPayloadRange, esOffset int64, size int) ([]RawRange, error) {
        if len(ranges) == 0 {
                return nil, fmt.Errorf("no ranges available")
        }

        // Use binary search to find starting range
        rangeIdx := binarySearchRanges(ranges, esOffset)
        if rangeIdx < 0 {
                rangeIdx = 0
                for rangeIdx < len(ranges) && esOffset >= ranges[rangeIdx].ESOffset+int64(ranges[rangeIdx].Size) {
                        rangeIdx++
                }
        }

        if rangeIdx >= len(ranges) {
                return nil, fmt.Errorf("ES offset %d not found in ranges", esOffset)
        }

        r := ranges[rangeIdx]
        if esOffset < r.ESOffset || esOffset >= r.ESOffset+int64(r.Size) {
                return nil, fmt.Errorf("ES offset %d not in range [%d, %d)", esOffset, r.ESOffset, r.ESOffset+int64(r.Size))
        }

        var result []RawRange
        remaining := size

        for remaining > 0 && rangeIdx < len(ranges) {
                r := ranges[rangeIdx]

                if esOffset < r.ESOffset {
                        break
                }

                if esOffset >= r.ESOffset+int64(r.Size) {
                        rangeIdx++
                        continue
                }

                offsetInPayload := esOffset - r.ESOffset
                availableInRange := int64(r.Size) - offsetInPayload
                toTake := remaining
                if int64(toTake) > availableInRange {
                        toTake = int(availableInRange)
                }

                fileOffset := r.FileOffset + offsetInPayload
                result = append(result, RawRange{
                        FileOffset: fileOffset,
                        Size:       toTake,
                })

                esOffset += int64(toTake)
                remaining -= toTake
                rangeIdx++
        }

        if remaining > 0 {
                return nil, fmt.Errorf("could not map entire ES region: %d bytes remaining", remaining)
        }

        return result, nil
}

// totalESSizeFromRanges returns the total ES size from a range list.
func totalESSizeFromRanges(ranges []PESPayloadRange) int64 {
        if len(ranges) == 0 {
                return 0
        }
        last := ranges[len(ranges)-1]
        return last.ESOffset + int64(last.Size)
}

package source

import (
        "fmt"

        "github.com/cespare/xxhash/v2"
        "golang.org/x/sys/unix"
)

// Lookup finds locations in the source that match the given hash.
func (idx *Index) Lookup(hash uint64) []Location {
        return idx.HashToLocations[hash]
}

// ReadESDataAt reads ES data at the given location.
// For sources that use ES offsets, this handles the translation.
// For audio locations, uses the sub-stream ID from the location.
func (idx *Index) ReadESDataAt(loc Location, size int) ([]byte, error) {
        if int(loc.FileIndex) >= len(idx.ESReaders) || idx.ESReaders[loc.FileIndex] == nil {
                // No ES reader - this shouldn't happen for ES-based indexes
                return nil, fmt.Errorf("no ES reader for file %d", loc.FileIndex)
        }
        if loc.IsVideo {
                return idx.ESReaders[loc.FileIndex].ReadESData(loc.Offset, size, true)
        }
        // For audio, use the sub-stream specific reader
        return idx.ESReaders[loc.FileIndex].ReadAudioSubStreamData(loc.AudioSubStreamID, loc.Offset, size)
}

// hintedESReader is the interface for hint-based byte reading.
// Both MPEGPSParser and MPEGTSParser implement this.
type hintedESReader interface {
        ReadESByteWithHint(esOffset int64, isVideo bool, rangeHint int) (byte, int, bool)
        ReadAudioByteWithHint(subStreamID byte, esOffset int64, rangeHint int) (byte, int, bool)
}

// ReadESByteWithHint reads a single byte from the ES stream, using a range hint
// to avoid binary search when reading sequentially. Returns the byte, the new range
// hint for the next call, and success status. Pass rangeHint=-1 to force binary search.
// This is optimized for the expandMatch hot path where we read bytes sequentially.
func (idx *Index) ReadESByteWithHint(loc Location, rangeHint int) (byte, int, bool) {
        if int(loc.FileIndex) >= len(idx.ESReaders) || idx.ESReaders[loc.FileIndex] == nil {
                return 0, -1, false
        }

        // Try hint-based reading (fast path for MPEGPSParser and MPEGTSParser)
        if hinted, ok := idx.ESReaders[loc.FileIndex].(hintedESReader); ok {
                if loc.IsVideo {
                        return hinted.ReadESByteWithHint(loc.Offset, true, rangeHint)
                }
                return hinted.ReadAudioByteWithHint(loc.AudioSubStreamID, loc.Offset, rangeHint)
        }

        // Fallback: use ReadESData (allocates, but works for any ESReader)
        var data []byte
        var err error
        if loc.IsVideo {
                data, err = idx.ESReaders[loc.FileIndex].ReadESData(loc.Offset, 1, true)
        } else {
                data, err = idx.ESReaders[loc.FileIndex].ReadAudioSubStreamData(loc.AudioSubStreamID, loc.Offset, 1)
        }
        if err != nil || len(data) == 0 {
                return 0, -1, false
        }
        return data[0], -1, true
}

// ComputeHash calculates the xxhash of the given data.
func ComputeHash(data []byte) uint64 {
        return xxhash.Sum64(data)
}

// AdviseForMatching sets madvise hints on source mmap'd files before matching.
// For raw-indexed sources (Blu-ray with raw offsets), sets MADV_SEQUENTIAL since
// locality-aware matching produces largely sequential access.
// For ES-indexed sources (DVD MPEG-PS, Blu-ray M2TS with ES offsets), the ES reader
// translates ES offsets to scattered positions in the container file, so MADV_SEQUENTIAL
// would hurt. Uses MADV_NORMAL (default adaptive readahead) instead.
func (idx *Index) AdviseForMatching() {
        if idx.UsesESOffsets {
                // ES-based: access pattern in the raw file is not sequential
                // (ES offsets map to scattered PES packets). Use normal adaptive readahead.
                for _, mmapFile := range idx.MmapFiles {
                        if mmapFile != nil {
                                mmapFile.Advise(unix.MADV_NORMAL)
                        }
                }
        } else {
                // Raw-indexed: locality-aware matching produces sequential access
                for _, reader := range idx.RawReaders {
                        if rr, ok := reader.(*mmapRawReader); ok {
                                rr.mmapFile.Advise(unix.MADV_SEQUENTIAL)
                        }
                }
        }
}

// Close releases resources held by the index.
func (idx *Index) Close() error {
        // Close all mmap files (these back the ESReaders and RawReaders)
        for _, mmapFile := range idx.MmapFiles {
                if mmapFile != nil {
                        mmapFile.Close()
                }
        }
        // Close all raw readers (which also close their mmap files)
        for _, reader := range idx.RawReaders {
                if reader != nil {
                        reader.Close()
                }
        }
        return nil
}

package source

import (
        "fmt"
        "io"
        "path/filepath"
        "strings"

        "github.com/cespare/xxhash/v2"
        "github.com/stuckj/mkvdup/internal/mmap"
)

const (
        // DefaultWindowSize is the default number of bytes to hash at each sync point
        DefaultWindowSize = 64

        // MinWindowSize is the minimum allowed window size
        MinWindowSize = 32

        // MaxWindowSize is the maximum allowed window size
        MaxWindowSize = 4096
)

// Indexer builds a hash index from source media files.
type Indexer struct {
        sourceDir      string
        sourceType     Type
        windowSize     int
        index          *Index
        useRawIndexing bool      // Force raw file indexing even for DVDs
        verboseWriter  io.Writer // Destination for diagnostic output (nil = disabled)
}

// NewIndexer creates a new Indexer for the given source directory.
func NewIndexer(sourceDir string, windowSize int) (*Indexer, error) {
        return NewIndexerWithOptions(sourceDir, windowSize, false)
}

// NewIndexerWithOptions creates a new Indexer with additional options.
// useRawIndexing forces raw file indexing even for DVDs (useful for finding
// content from any title/stream in the ISO).
func NewIndexerWithOptions(sourceDir string, windowSize int, useRawIndexing bool) (*Indexer, error) {
        sourceType, err := DetectType(sourceDir)
        if err != nil {
                return nil, fmt.Errorf("detect source type: %w", err)
        }

        if windowSize < MinWindowSize {
                windowSize = MinWindowSize
        }
        if windowSize > MaxWindowSize {
                windowSize = MaxWindowSize
        }

        return &Indexer{
                sourceDir:      sourceDir,
                sourceType:     sourceType,
                windowSize:     windowSize,
                index:          NewIndex(sourceDir, sourceType, windowSize),
                useRawIndexing: useRawIndexing,
        }, nil
}

// SourceType returns the detected source type.
func (idx *Indexer) SourceType() Type {
        return idx.sourceType
}

// SetVerboseWriter sets the destination for diagnostic output during indexing.
// Pass nil to disable verbose output.
func (idx *Indexer) SetVerboseWriter(w io.Writer) {
        idx.verboseWriter = w
}

// SourceDir returns the source directory path.
func (idx *Indexer) SourceDir() string {
        return idx.sourceDir
}

// ProgressFunc is called during indexing to report progress.
// processed is the number of bytes processed so far, total is the total bytes to process.
type ProgressFunc func(processed, total int64)

// Build scans all media files and builds the hash index.
// If progress is non-nil, it will be called periodically to report progress.
func (idx *Indexer) Build(progress ProgressFunc) error {
        files, err := EnumerateMediaFiles(idx.sourceDir, idx.sourceType)
        if err != nil {
                return fmt.Errorf("enumerate media files: %w", err)
        }

        if len(files) == 0 {
                return fmt.Errorf("no media files found in %s", idx.sourceDir)
        }

        // Calculate total size for progress reporting
        var totalSize int64
        for _, relPath := range files {
                fullPath := filepath.Join(idx.sourceDir, relPath)
                size, err := GetFileInfo(fullPath)
                if err != nil {
                        return fmt.Errorf("get file info for %s: %w", relPath, err)
                }
                totalSize += size
        }

        // Pre-allocate hash map to reduce reallocation
        // Estimate: ~1 sync point per 2KB of data on average
        estimatedSyncPoints := int(totalSize / 2048)
        if estimatedSyncPoints < 10000 {
                estimatedSyncPoints = 10000
        }
        idx.index.HashToLocations = make(map[uint64][]Location, estimatedSyncPoints)

        // For DVDs (MPEG-PS) and Blu-rays (MPEG-TS), use ES-based indexing
        // so the matcher works with continuous ES data.
        // Raw indexing is available as fallback for DVDs.
        if idx.sourceType == TypeDVD && !idx.useRawIndexing {
                idx.index.UsesESOffsets = true
        } else if idx.sourceType == TypeBluray {
                idx.index.UsesESOffsets = true
        }

        var processedSize int64

        // Process each file
        // fileIndex tracks the next available index for source file entries.
        // Most files produce one entry, but Blu-ray ISOs produce one per M2TS region.
        fileIndex := 0
        for _, relPath := range files {
                fullPath := filepath.Join(idx.sourceDir, relPath)

                size, err := GetFileInfo(fullPath)
                if err != nil {
                        return fmt.Errorf("get file info for %s: %w", relPath, err)
                }

                var checksum uint64
                if idx.sourceType == TypeDVD && !idx.useRawIndexing {
                        checksum, err = idx.indexMPEGPSFile(uint16(fileIndex), fullPath, size, func(fileProcessed int64) {
                                if progress != nil {
                                        progress(processedSize+fileProcessed, totalSize)
                                }
                        })
                } else if idx.sourceType == TypeBluray && isISOFile(relPath) {
                        // Blu-ray ISO: one ISO may contain multiple M2TS regions,
                        // each producing a separate source file entry.
                        var n int
                        n, _, err = idx.indexBlurayISOFile(uint16(fileIndex), fullPath, relPath, size, func(fileProcessed int64) {
                                if progress != nil {
                                        progress(processedSize+fileProcessed, totalSize)
                                }
                        })
                        if err != nil {
                                return fmt.Errorf("index file %s: %w", relPath, err)
                        }
                        // indexBlurayISOFile already added source file entries
                        fileIndex += n
                        processedSize += size
                        continue
                } else if idx.sourceType == TypeBluray {
                        checksum, err = idx.indexM2TSFile(uint16(fileIndex), fullPath, size, func(fileProcessed int64) {
                                if progress != nil {
                                        progress(processedSize+fileProcessed, totalSize)
                                }
                        })
                } else {
                        checksum, err = idx.indexRawFile(uint16(fileIndex), fullPath, size, func(fileProcessed int64) {
                                if progress != nil {
                                        progress(processedSize+fileProcessed, totalSize)
                                }
                        })
                }
                if err != nil {
                        return fmt.Errorf("index file %s: %w", relPath, err)
                }

                idx.index.Files = append(idx.index.Files, File{
                        RelativePath: relPath,
                        Size:         size,
                        Checksum:     checksum,
                })

                fileIndex++
                processedSize += size
        }

        return nil
}

// isISOFile returns true if the path has an .iso extension.
func isISOFile(path string) bool {
        return strings.HasSuffix(strings.ToLower(path), ".iso")
}

// checksumWithProgress computes xxhash checksum of data in chunks, calling
// progress with the number of bytes processed so far after each chunk.
func checksumWithProgress(data []byte, progress func(int64)) uint64 {
        hasher := xxhash.New()
        const chunkSize = 16 * 1024 * 1024 // 16MB chunks
        for offset := 0; offset < len(data); offset += chunkSize {
                end := offset + chunkSize
                if end > len(data) {
                        end = len(data)
                }
                hasher.Write(data[offset:end])
                if progress != nil {
                        progress(int64(end))
                }
        }
        return hasher.Sum64()
}

// indexMPEGPSFile processes an MPEG-PS file (DVD ISO) using ES-aware indexing.
// It extracts the elementary stream data and indexes sync points within it.
func (idx *Indexer) indexMPEGPSFile(fileIndex uint16, path string, size int64, progress func(int64)) (uint64, error) {
        // Memory-map the file with zero-copy access
        mmapFile, err := mmap.Open(path)
        if err != nil {
                return 0, fmt.Errorf("mmap open: %w", err)
        }
        // Note: Don't close mmapFile - it's stored in MmapFiles for later use

        // Store the mmap file for cleanup
        idx.index.MmapFiles = append(idx.index.MmapFiles, mmapFile)

        // Parse MPEG-PS structure with progress reporting using zero-copy data
        parser := NewMPEGPSParser(mmapFile.Data())

        // Phase 1: Parse MPEG-PS structure (0% → 33%)
        if err := parser.ParseWithProgress(func(processed, total int64) {
                if progress != nil {
                        progress(processed / 3)
                }
        }); err != nil {
                return 0, fmt.Errorf("parse MPEG-PS: %w", err)
        }

        // Store parser for later use by matcher
        idx.index.ESReaders = append(idx.index.ESReaders, parser)

        // Phase 2: Checksum (33% → 66%)
        checksum := checksumWithProgress(mmapFile.Data(), func(processed int64) {
                if progress != nil {
                        progress(size/3 + processed/3)
                }
        })

        // Phase 3: Index ES data (66% → 100%)
        videoESSize := parser.TotalESSize(true)
        if videoESSize > 0 {
                indexProgress := func(fileOffset int64) {
                        if progress != nil {
                                progress(2*size/3 + fileOffset/3)
                        }
                }
                if err := idx.indexESData(fileIndex, parser, true, videoESSize, indexProgress); err != nil {
                        return 0, fmt.Errorf("index video ES: %w", err)
                }
        }

        // Index each audio sub-stream separately
        audioSubStreams := parser.AudioSubStreams()
        for _, subStreamID := range audioSubStreams {
                subStreamSize := parser.AudioSubStreamESSize(subStreamID)
                if subStreamSize > 0 {
                        if parser.IsLPCMSubStream(subStreamID) {
                                // LPCM has no natural sync patterns; use fixed-interval sync points.
                                // The indexer forces the slow path (ReadAudioSubStreamData) for LPCM
                                // so the data goes through the byte-swap transform.
                                if err := idx.indexSubStream(fileIndex, parser, subStreamID, subStreamSize, FindLPCMIndexSyncPoints); err != nil {
                                        return 0, fmt.Errorf("index LPCM sub-stream 0x%02X: %w", subStreamID, err)
                                }
                        } else {
                                if err := idx.indexAudioSubStream(fileIndex, parser, subStreamID, subStreamSize); err != nil {
                                        return 0, fmt.Errorf("index audio sub-stream 0x%02X: %w", subStreamID, err)
                                }
                        }
                }
        }

        if progress != nil {
                progress(size)
        }

        return checksum, nil
}

// Index returns the built index. Must call Build first.
func (idx *Indexer) Index() *Index {
        return idx.index
}

package source

import (
        "fmt"

        "github.com/stuckj/mkvdup/internal/mmap"
        "golang.org/x/sys/unix"
)

// indexM2TSFile processes a Blu-ray M2TS file using ES-aware indexing.
// It parses the MPEG-TS structure to extract elementary stream data and
// indexes sync points within the continuous ES, matching what MKV files contain.
func (idx *Indexer) indexM2TSFile(fileIndex uint16, path string, size int64, progress func(int64)) (uint64, error) {
        mmapFile, err := mmap.Open(path)
        if err != nil {
                return 0, fmt.Errorf("mmap open: %w", err)
        }
        // Note: Don't close mmapFile - it's stored in MmapFiles for later use
        idx.index.MmapFiles = append(idx.index.MmapFiles, mmapFile)

        mmapFile.Advise(unix.MADV_SEQUENTIAL)

        // Phase 1: Parse MPEG-TS structure (0% → 33%)
        parser := NewMPEGTSParser(mmapFile.Data())

        if err := parser.ParseWithProgress(func(processed, total int64) {
                if progress != nil {
                        progress(processed / 3)
                }
        }); err != nil {
                return 0, fmt.Errorf("parse MPEG-TS: %w", err)
        }

        // Store parser for later use by matcher
        idx.index.ESReaders = append(idx.index.ESReaders, parser)

        // Phase 2: Checksum (33% → 66%)
        checksum := checksumWithProgress(mmapFile.Data(), func(processed int64) {
                if progress != nil {
                        progress(size/3 + processed/3)
                }
        })

        // Phase 3: Index ES data (66% → 100%)
        videoESSize := parser.TotalESSize(true)
        if videoESSize > 0 {
                indexProgress := func(fileOffset int64) {
                        if progress != nil {
                                progress(2*size/3 + fileOffset/3)
                        }
                }
                if err := idx.indexESData(fileIndex, parser, true, videoESSize, indexProgress); err != nil {
                        return 0, fmt.Errorf("index video ES: %w", err)
                }
        }

        // Index each audio sub-stream separately
        subtitleIDs := parser.SubtitleSubStreams()
        subtitleSet := make(map[byte]bool, len(subtitleIDs))
        for _, id := range subtitleIDs {
                subtitleSet[id] = true
        }
        for _, subStreamID := range parser.AudioSubStreams() {
                if subtitleSet[subStreamID] {
                        continue // indexed below with subtitle-specific sync points
                }
                subStreamSize := parser.AudioSubStreamESSize(subStreamID)
                if subStreamSize > 0 {
                        if err := idx.indexAudioSubStream(fileIndex, parser, subStreamID, subStreamSize); err != nil {
                                return 0, fmt.Errorf("index audio sub-stream %d: %w", subStreamID, err)
                        }
                }
        }

        // Index subtitle sub-streams with PGS sync point detection
        for _, subStreamID := range subtitleIDs {
                subStreamSize := parser.AudioSubStreamESSize(subStreamID)
                if subStreamSize > 0 {
                        if err := idx.indexSubStream(fileIndex, parser, subStreamID, subStreamSize, FindPGSSyncPoints); err != nil {
                                return 0, fmt.Errorf("index subtitle sub-stream %d: %w", subStreamID, err)
                        }
                }
        }

        if progress != nil {
                progress(size)
        }

        return checksum, nil
}

// indexBlurayISOFile processes a Blu-ray ISO file by finding M2TS regions
// within the ISO9660 filesystem and indexing each as a separate source file entry.
// Returns the number of source file entries created and the ISO checksum.
func (idx *Indexer) indexBlurayISOFile(startFileIndex uint16, path, relPath string, size int64, progress func(int64)) (int, uint64, error) {
        // Find M2TS file extents within the ISO
        m2tsFiles, err := findBlurayM2TSInISO(path)
        if err != nil {
                return 0, 0, fmt.Errorf("find M2TS in ISO: %w", err)
        }
        if len(m2tsFiles) == 0 {
                return 0, 0, fmt.Errorf("no M2TS files found in Blu-ray ISO")
        }

        // Memory-map the entire ISO
        mmapFile, err := mmap.Open(path)
        if err != nil {
                return 0, 0, fmt.Errorf("mmap open: %w", err)
        }
        // Don't close — stored in MmapFiles for later use
        idx.index.MmapFiles = append(idx.index.MmapFiles, mmapFile)

        mmapFile.Advise(unix.MADV_SEQUENTIAL)
        isoData := mmapFile.Data()

        // Phase 1: Parse all M2TS regions (0% → 33%)
        type parsedM2TS struct {
                adapter *isoM2TSAdapter
                extent  isoFileExtent
        }
        var parsed []parsedM2TS

        for _, m2ts := range m2tsFiles {
                var adapter *isoM2TSAdapter

                if m2ts.Extents != nil {
                        // Multi-extent UDF file: create virtual contiguous view
                        // over the existing mmap sub-slices (zero-copy, no heap allocation)
                        mr := newMultiRegionData(m2ts.Extents, isoData)

                        parser := NewMPEGTSParserMultiRegion(mr)
                        if err := parser.ParseWithProgress(nil); err != nil {
                                if idx.verboseWriter != nil {
                                        fmt.Fprintf(idx.verboseWriter, "  [indexBlurayISO] skipping %s: %v\n", m2ts.Name, err)
                                }
                                continue
                        }
                        adapter = newISOAdapterMultiExtent(parser, mr, m2ts.Extents)
                } else {
                        // Contiguous file: use sub-slice of mmap'd ISO
                        endOffset := m2ts.Offset + m2ts.Size
                        if endOffset > int64(len(isoData)) {
                                if idx.verboseWriter != nil {
                                        fmt.Fprintf(idx.verboseWriter, "  [indexBlurayISO] skipping %s: extent beyond ISO bounds (%d + %d > %d)\n",
                                                m2ts.Name, m2ts.Offset, m2ts.Size, len(isoData))
                                }
                                continue
                        }

                        m2tsData := isoData[m2ts.Offset:endOffset]
                        parser := NewMPEGTSParser(m2tsData)
                        if err := parser.ParseWithProgress(nil); err != nil {
                                if idx.verboseWriter != nil {
                                        fmt.Fprintf(idx.verboseWriter, "  [indexBlurayISO] skipping %s: %v\n", m2ts.Name, err)
                                }
                                continue
                        }
                        adapter = newISOAdapter(parser, isoData, m2ts.Offset)
                }

                parsed = append(parsed, parsedM2TS{adapter: adapter, extent: m2ts})
        }

        if len(parsed) == 0 {
                return 0, 0, fmt.Errorf("no valid M2TS streams found in Blu-ray ISO")
        }

        if progress != nil {
                progress(size / 3)
        }

        // Phase 2: Checksum the full ISO (33% → 66%)
        checksum := checksumWithProgress(isoData, func(processed int64) {
                if progress != nil {
                        progress(size/3 + processed/3)
                }
        })

        // Phase 3: Index ES data from all M2TS regions (66% → 100%)
        entriesCreated := 0
        for _, p := range parsed {
                fileIndex := startFileIndex + uint16(entriesCreated)
                adapter := p.adapter

                // Store adapter as ESReader for this source file entry
                idx.index.ESReaders = append(idx.index.ESReaders, adapter)

                // Index video ES
                videoESSize := adapter.TotalESSize(true)
                if videoESSize > 0 {
                        if err := idx.indexESData(fileIndex, adapter, true, videoESSize, nil); err != nil {
                                return 0, 0, fmt.Errorf("index video ES for %s: %w", p.extent.Name, err)
                        }
                }

                // Index audio sub-streams
                subtitleIDs := adapter.parser.SubtitleSubStreams()
                subtitleSet := make(map[byte]bool, len(subtitleIDs))
                for _, id := range subtitleIDs {
                        subtitleSet[id] = true
                }
                for _, subStreamID := range adapter.AudioSubStreams() {
                        if subtitleSet[subStreamID] {
                                continue
                        }
                        subStreamSize := adapter.AudioSubStreamESSize(subStreamID)
                        if subStreamSize > 0 {
                                if err := idx.indexAudioSubStream(fileIndex, adapter, subStreamID, subStreamSize); err != nil {
                                        return 0, 0, fmt.Errorf("index audio sub-stream %d for %s: %w", subStreamID, p.extent.Name, err)
                                }
                        }
                }

                // Index subtitle sub-streams
                for _, subStreamID := range subtitleIDs {
                        subStreamSize := adapter.AudioSubStreamESSize(subStreamID)
                        if subStreamSize > 0 {
                                if err := idx.indexSubStream(fileIndex, adapter, subStreamID, subStreamSize, FindPGSSyncPoints); err != nil {
                                        return 0, 0, fmt.Errorf("index subtitle sub-stream %d for %s: %w", subStreamID, p.extent.Name, err)
                                }
                        }
                }

                // Add source file entry — all entries share the same ISO path, size, checksum
                idx.index.Files = append(idx.index.Files, File{
                        RelativePath: relPath,
                        Size:         size,
                        Checksum:     checksum,
                })

                entriesCreated++
        }

        if progress != nil {
                progress(size)
        }

        return entriesCreated, checksum, nil
}

package source

import (
        "fmt"

        "github.com/cespare/xxhash/v2"
)

// esDataProvider is the interface needed by indexESData and indexAudioSubStream.
// Both MPEGPSParser and MPEGTSParser implement this, as well as isoM2TSAdapter.
type esDataProvider interface {
        Data() []byte
        DataSlice(off int64, size int) []byte
        DataSize() int64
        FilteredVideoRanges() []PESPayloadRange
        FilteredAudioRanges(subStreamID byte) []PESPayloadRange
        ReadESData(esOffset int64, size int, isVideo bool) ([]byte, error)
        ReadAudioSubStreamData(subStreamID byte, esOffset int64, size int) ([]byte, error)
        IsLPCMSubStream(subStreamID byte) bool
}

// indexESData indexes the elementary stream data from an ES-aware parser.
// Uses zero-copy iteration through PES payload ranges.
func (idx *Indexer) indexESData(fileIndex uint16, parser esDataProvider, isVideo bool, esSize int64, progress func(int64)) error {
        ranges := parser.FilteredVideoRanges()
        if len(ranges) == 0 {
                return nil
        }

        dataSize := parser.DataSize()
        syncPointCount := 0
        var indexFastPath, indexSlowPath, indexSkipped int

        // Iterate through each PES payload range (zero-copy when within one region)
        for rangeIdx, r := range ranges {
                endOffset := r.FileOffset + int64(r.Size)
                if endOffset > dataSize {
                        continue
                }
                rangeData := parser.DataSlice(r.FileOffset, r.Size)

                // Find NAL unit start positions (byte after 00 00 01)
                // Hashing from NAL header enables matching both Annex B and AVCC formats
                syncPoints := FindVideoNALStarts(rangeData)

                // Add each sync point to the index
                for _, offsetInRange := range syncPoints {
                        syncESOffset := r.ESOffset + int64(offsetInRange)

                        // Ensure we have enough data for the window
                        if syncESOffset+int64(idx.windowSize) > esSize {
                                continue
                        }

                        // Check if window fits within this range (zero-copy fast path)
                        if offsetInRange+idx.windowSize <= len(rangeData) {
                                window := rangeData[offsetInRange : offsetInRange+idx.windowSize]
                                hash := xxhash.Sum64(window)

                                idx.index.HashToLocations[hash] = append(idx.index.HashToLocations[hash], Location{
                                        FileIndex: fileIndex,
                                        Offset:    syncESOffset,
                                        IsVideo:   isVideo,
                                })
                                syncPointCount++
                                indexFastPath++
                        } else {
                                // Window spans range boundary - use ReadESData (may copy)
                                window, err := parser.ReadESData(syncESOffset, idx.windowSize, isVideo)
                                if err != nil || len(window) < idx.windowSize {
                                        indexSkipped++
                                        continue
                                }
                                hash := xxhash.Sum64(window)

                                idx.index.HashToLocations[hash] = append(idx.index.HashToLocations[hash], Location{
                                        FileIndex: fileIndex,
                                        Offset:    syncESOffset,
                                        IsVideo:   isVideo,
                                })
                                syncPointCount++
                                indexSlowPath++
                        }

                }

                // Report progress periodically
                if rangeIdx%10000 == 0 && progress != nil {
                        progress(r.FileOffset)
                }
        }

        if idx.verboseWriter != nil {
                fmt.Fprintf(idx.verboseWriter, "  [indexESData] video=%v: %d NALs indexed (fast=%d, slow/cross-range=%d, skipped=%d)\n",
                        isVideo, syncPointCount, indexFastPath, indexSlowPath, indexSkipped)
        }

        return nil
}

// syncPointFinder is a function that returns sync point offsets within data.
type syncPointFinder func(data []byte) []int

// indexAudioSubStream indexes a specific audio sub-stream.
func (idx *Indexer) indexAudioSubStream(fileIndex uint16, parser esDataProvider, subStreamID byte, esSize int64) error {
        return idx.indexSubStream(fileIndex, parser, subStreamID, esSize, FindAudioSyncPoints)
}

// indexSubStream indexes a specific sub-stream using the provided sync point finder.
// Uses zero-copy iteration through PES payload ranges.
// For LPCM sub-streams, always uses the slow path (ReadAudioSubStreamData) because
// the raw data is big-endian but the read method returns byte-swapped little-endian data.
func (idx *Indexer) indexSubStream(fileIndex uint16, parser esDataProvider, subStreamID byte, esSize int64, findSyncPoints syncPointFinder) error {
        ranges := parser.FilteredAudioRanges(subStreamID)
        if len(ranges) == 0 {
                return nil
        }

        dataSize := parser.DataSize()
        isLPCM := parser.IsLPCMSubStream(subStreamID)

        // Iterate through each PES payload range (zero-copy when within one region)
        for _, r := range ranges {
                endOffset := r.FileOffset + int64(r.Size)
                if endOffset > dataSize {
                        continue
                }
                rangeData := parser.DataSlice(r.FileOffset, r.Size)

                // Find sync points in this range (uses raw data — LPCM sync points
                // are fixed-interval so data content doesn't matter)
                syncPoints := findSyncPoints(rangeData)

                // Add each sync point to the index
                for _, offsetInRange := range syncPoints {
                        syncESOffset := r.ESOffset + int64(offsetInRange)

                        // Ensure we have enough data for the window
                        if syncESOffset+int64(idx.windowSize) > esSize {
                                continue
                        }

                        // For LPCM, always use ReadAudioSubStreamData which applies the transform.
                        // For non-LPCM, use the zero-copy fast path when possible.
                        if !isLPCM && offsetInRange+idx.windowSize <= len(rangeData) {
                                window := rangeData[offsetInRange : offsetInRange+idx.windowSize]
                                hash := xxhash.Sum64(window)

                                idx.index.HashToLocations[hash] = append(idx.index.HashToLocations[hash], Location{
                                        FileIndex:        fileIndex,
                                        Offset:           syncESOffset,
                                        IsVideo:          false,
                                        AudioSubStreamID: subStreamID,
                                })
                        } else {
                                // Window spans range boundary or LPCM - use ReadAudioSubStreamData
                                window, err := parser.ReadAudioSubStreamData(subStreamID, syncESOffset, idx.windowSize)
                                if err != nil || len(window) < idx.windowSize {
                                        continue
                                }
                                hash := xxhash.Sum64(window)

                                idx.index.HashToLocations[hash] = append(idx.index.HashToLocations[hash], Location{
                                        FileIndex:        fileIndex,
                                        Offset:           syncESOffset,
                                        IsVideo:          false,
                                        AudioSubStreamID: subStreamID,
                                })
                        }
                }
        }

        return nil
}

package source

import (
        "fmt"

        "github.com/cespare/xxhash/v2"
        "github.com/stuckj/mkvdup/internal/mmap"
        "golang.org/x/sys/unix"
)

// mmapRawReader wraps mmap.File to implement RawReader interface.
type mmapRawReader struct {
        mmapFile *mmap.File
}

func (r *mmapRawReader) ReadAt(buf []byte, offset int64) (int, error) {
        data := r.mmapFile.Slice(offset, len(buf))
        if data == nil {
                return 0, fmt.Errorf("offset out of range")
        }
        copy(buf, data)
        return len(data), nil
}

// Slice returns a zero-copy slice of the underlying mmap'd data.
func (r *mmapRawReader) Slice(offset int64, size int) []byte {
        return r.mmapFile.Slice(offset, size)
}

func (r *mmapRawReader) Len() int {
        return r.mmapFile.Len()
}

func (r *mmapRawReader) Close() error {
        return r.mmapFile.Close()
}

// indexRawFile processes a raw file (for non-DVD, non-Blu-ray formats).
// Processes the file in a single pass: computes checksum and indexes sync points
// together in chunks, releasing mmap pages as they're processed.
func (idx *Indexer) indexRawFile(fileIndex uint16, path string, size int64, progress func(int64)) (uint64, error) {
        mmapFile, err := mmap.Open(path)
        if err != nil {
                return 0, fmt.Errorf("mmap open: %w", err)
        }
        idx.index.RawReaders = append(idx.index.RawReaders, &mmapRawReader{mmapFile: mmapFile})

        mmapFile.Advise(unix.MADV_SEQUENTIAL)
        data := mmapFile.Data()

        return idx.indexRawFileData(fileIndex, mmapFile, data, size, progress)
}

// indexRawFileData is the core of indexRawFile operating on already-opened mmap data.
// Used as a fallback when M2TS packet structure cannot be detected.
func (idx *Indexer) indexRawFileData(fileIndex uint16, mmapFile *mmap.File, data []byte, size int64, progress func(int64)) (uint64, error) {
        hasher := xxhash.New()
        const chunkSize = 64 * 1024 * 1024
        const overlap = 3
        pageSize := unix.Getpagesize()
        checksumPos := 0

        for chunkStart := 0; chunkStart < len(data); {
                chunkEnd := chunkStart + chunkSize
                if chunkEnd > len(data) {
                        chunkEnd = len(data)
                }

                chunk := data[chunkStart:chunkEnd]

                if chunkEnd > checksumPos {
                        hasher.Write(data[checksumPos:chunkEnd])
                        checksumPos = chunkEnd
                }

                videoOffsets := FindVideoNALStartsInRange(chunk, chunkStart)
                audioOffsets := FindAudioSyncPointsInRange(chunk, chunkStart)

                for _, offset := range videoOffsets {
                        if offset+idx.windowSize > len(data) {
                                continue
                        }
                        window := data[offset : offset+idx.windowSize]
                        hash := xxhash.Sum64(window)
                        idx.index.HashToLocations[hash] = append(idx.index.HashToLocations[hash], Location{
                                FileIndex: fileIndex,
                                Offset:    int64(offset),
                        })
                }

                for _, offset := range audioOffsets {
                        if offset+idx.windowSize > len(data) {
                                continue
                        }
                        window := data[offset : offset+idx.windowSize]
                        hash := xxhash.Sum64(window)
                        idx.index.HashToLocations[hash] = append(idx.index.HashToLocations[hash], Location{
                                FileIndex: fileIndex,
                                Offset:    int64(offset),
                        })
                }

                if progress != nil {
                        progress(int64(chunkEnd))
                }

                releaseUpTo := (chunkStart / pageSize) * pageSize
                if releaseUpTo > 0 {
                        unix.Madvise(data[:releaseUpTo], unix.MADV_DONTNEED)
                }

                if chunkEnd >= len(data) {
                        break
                }
                chunkStart = chunkEnd - overlap
        }

        checksum := hasher.Sum64()
        mmapFile.Advise(unix.MADV_RANDOM)
        return checksum, nil
}

package source

import (
        "errors"
        "fmt"
        "io"
        "os"
        "strings"
)

// errNotISO9660 is returned when the image lacks a valid ISO9660 PVD,
// signaling the caller to try an alternative filesystem (e.g. UDF).
var errNotISO9660 = errors.New("not an ISO9660 image")

const isoSectorSize = 2048

// isoFileExtent represents a file within an ISO9660 filesystem.
type isoFileExtent struct {
        Name    string             // filename (uppercase, no version suffix)
        Offset  int64              // byte offset in ISO (first extent)
        Size    int64              // data length in bytes
        IsDir   bool               // true if this is a directory entry
        Extents []isoPhysicalRange // non-nil for multi-extent UDF files
}

// isoPhysicalRange describes one contiguous physical region within an ISO.
type isoPhysicalRange struct {
        ISOOffset int64 // byte offset in the ISO file
        Length    int64 // number of bytes
}

// findBlurayM2TSInISO finds M2TS files under BDMV/STREAM/ in a Blu-ray ISO.
// Tries UDF first (native Blu-ray filesystem), falls back to ISO9660.
// UDF is preferred because ISO9660 has a 4 GB file size limit and cannot
// properly represent large M2TS files common on Blu-ray discs.
func findBlurayM2TSInISO(isoPath string) ([]isoFileExtent, error) {
        f, err := os.Open(isoPath)
        if err != nil {
                return nil, err
        }
        defer f.Close()

        // Try UDF first — Blu-ray's native filesystem, no file size limits.
        udfFiles, udfErr := findBlurayM2TSInUDF(f)
        if udfErr == nil && len(udfFiles) > 0 {
                return udfFiles, nil
        }

        // Fall back to ISO9660 (some DVD-based ISOs or hybrid discs).
        rootExtent, rootDataLen, err := readISOPVDRoot(f)
        if err != nil {
                if errors.Is(err, errNotISO9660) {
                        // No ISO9660 PVD found — report both failures if UDF also failed.
                        if udfErr != nil {
                                return nil, fmt.Errorf("neither UDF (%v) nor ISO9660 (%w) found", udfErr, err)
                        }
                        return nil, fmt.Errorf("read ISO PVD: %w", err)
                }
                // ISO9660 PVD exists but had a read/parse error — surface it directly.
                if udfErr != nil {
                        return nil, fmt.Errorf("read ISO PVD: %w (UDF attempt also failed: %v)", err, udfErr)
                }
                return nil, fmt.Errorf("read ISO PVD: %w", err)
        }

        // Navigate: root → BDMV → STREAM
        rootEntries, err := readISODirectory(f, rootExtent, rootDataLen)
        if err != nil {
                return nil, fmt.Errorf("read ISO root directory: %w", err)
        }

        bdmv, err := findISOEntry(rootEntries, "BDMV")
        if err != nil {
                return nil, fmt.Errorf("find BDMV directory: %w", err)
        }

        bdmvEntries, err := readISODirectory(f, uint32(bdmv.Offset/isoSectorSize), uint32(bdmv.Size))
        if err != nil {
                return nil, fmt.Errorf("read BDMV directory: %w", err)
        }

        stream, err := findISOEntry(bdmvEntries, "STREAM")
        if err != nil {
                return nil, fmt.Errorf("find STREAM directory: %w", err)
        }

        streamEntries, err := readISODirectory(f, uint32(stream.Offset/isoSectorSize), uint32(stream.Size))
        if err != nil {
                return nil, fmt.Errorf("read STREAM directory: %w", err)
        }

        // Collect M2TS files
        var m2tsFiles []isoFileExtent
        for _, e := range streamEntries {
                if !e.IsDir && strings.HasSuffix(e.Name, ".M2TS") {
                        m2tsFiles = append(m2tsFiles, e)
                }
        }

        return m2tsFiles, nil
}

// readISOPVDRoot reads the Primary Volume Descriptor and returns the root
// directory extent LBA and data length.
func readISOPVDRoot(f *os.File) (extentLBA uint32, dataLen uint32, err error) {
        const pvdOffset = 16 * isoSectorSize

        pvd := make([]byte, isoSectorSize)
        if _, err := f.ReadAt(pvd, pvdOffset); err != nil {
                return 0, 0, err
        }

        // Verify PVD: type=1, signature="CD001"
        if pvd[0] != 1 || string(pvd[1:6]) != "CD001" {
                return 0, 0, fmt.Errorf("%w: invalid primary volume descriptor", errNotISO9660)
        }

        // Root directory record at offset 156
        root := pvd[156:]
        if len(root) < 34 {
                return 0, 0, fmt.Errorf("%w: root directory record too short", errNotISO9660)
        }

        extentLBA = uint32(root[2]) | uint32(root[3])<<8 |
                uint32(root[4])<<16 | uint32(root[5])<<24
        dataLen = uint32(root[10]) | uint32(root[11])<<8 |
                uint32(root[12])<<16 | uint32(root[13])<<24

        return extentLBA, dataLen, nil
}

// readISODirectory reads and parses an ISO9660 directory at the given extent.
func readISODirectory(f *os.File, extentLBA, dataLen uint32) ([]isoFileExtent, error) {
        // Cap directory read to 256KB to avoid huge allocations
        if dataLen > 256*1024 {
                dataLen = 256 * 1024
        }

        dirData := make([]byte, dataLen)
        if _, err := f.ReadAt(dirData, int64(extentLBA)*isoSectorSize); err != nil {
                return nil, err
        }

        var entries []isoFileExtent
        offset := 0
        for offset < len(dirData) {
                recLen := int(dirData[offset])
                if recLen == 0 {
                        // Padding at end of sector — skip to next sector boundary
                        nextSector := ((offset / isoSectorSize) + 1) * isoSectorSize
                        if nextSector >= len(dirData) {
                                break
                        }
                        offset = nextSector
                        continue
                }
                if offset+recLen > len(dirData) {
                        break
                }
                if offset+33 > len(dirData) {
                        break
                }

                nameLen := int(dirData[offset+32])
                if nameLen == 0 || offset+33+nameLen > len(dirData) {
                        offset += recLen
                        continue
                }

                name := string(dirData[offset+33 : offset+33+nameLen])

                // Skip "." and ".." entries (single byte 0x00 or 0x01)
                if nameLen == 1 && (name[0] == 0x00 || name[0] == 0x01) {
                        offset += recLen
                        continue
                }

                // Normalize: uppercase, strip version (";1") and trailing dot
                name = strings.ToUpper(name)
                if idx := strings.Index(name, ";"); idx >= 0 {
                        name = name[:idx]
                }
                name = strings.TrimSuffix(name, ".")

                // Extract extent LBA (bytes 2-5, little-endian)
                eLBA := uint32(dirData[offset+2]) | uint32(dirData[offset+3])<<8 |
                        uint32(dirData[offset+4])<<16 | uint32(dirData[offset+5])<<24
                // Extract data length (bytes 10-13, little-endian)
                eLen := uint32(dirData[offset+10]) | uint32(dirData[offset+11])<<8 |
                        uint32(dirData[offset+12])<<16 | uint32(dirData[offset+13])<<24

                // File flags byte 25: bit 1 = directory
                isDir := dirData[offset+25]&0x02 != 0

                entries = append(entries, isoFileExtent{
                        Name:   name,
                        Offset: int64(eLBA) * isoSectorSize,
                        Size:   int64(eLen),
                        IsDir:  isDir,
                })

                offset += recLen
        }

        return entries, nil
}

// findISOEntry finds a named directory entry (case-insensitive).
func findISOEntry(entries []isoFileExtent, name string) (*isoFileExtent, error) {
        upper := strings.ToUpper(name)
        for i := range entries {
                if entries[i].Name == upper {
                        return &entries[i], nil
                }
        }
        return nil, fmt.Errorf("%q not found", name)
}

// readISOFileExtent reads up to maxBytes from an isoFileExtent, handling both
// contiguous files (single ReadAt from Offset) and non-contiguous UDF files
// (stitching reads across Extents). Returns the data read and any error.
// io.EOF and io.ErrUnexpectedEOF are treated as non-fatal (partial read OK).
func readISOFileExtent(f *os.File, ext isoFileExtent, maxBytes int64) ([]byte, error) {
        readSize := min(ext.Size, maxBytes)
        if readSize <= 0 {
                return nil, fmt.Errorf("file %s has non-positive size %d", ext.Name, ext.Size)
        }

        data := make([]byte, readSize)

        if len(ext.Extents) == 0 {
                // Contiguous: single read from Offset.
                n, err := f.ReadAt(data, ext.Offset)
                if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
                        return nil, err
                }
                return data[:n], nil
        }

        // Non-contiguous: stitch reads across physical extents.
        var totalRead int
        for _, pe := range ext.Extents {
                if int64(totalRead) >= readSize {
                        break
                }
                remaining := int(readSize) - totalRead
                chunkSize := min(int(pe.Length), remaining)
                if chunkSize <= 0 {
                        continue
                }
                n, err := f.ReadAt(data[totalRead:totalRead+chunkSize], pe.ISOOffset)
                totalRead += n
                if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
                        if totalRead == 0 {
                                return nil, err
                        }
                        break // keep what we have
                }
                if n < chunkSize {
                        break // short read
                }
        }
        if totalRead == 0 {
                return nil, fmt.Errorf("no data read from %s extents", ext.Name)
        }
        return data[:totalRead], nil
}

package source

import "sort"

// isoM2TSAdapter wraps an MPEGTSParser to provide ISO-level integration
// for an M2TS region embedded within a Blu-ray ISO file. The parser operates
// on a sub-slice (contiguous) or virtual contiguous view (multi-extent) of
// the ISO data, producing FileOffset values relative to that view.
//
// The adapter handles two offset domains:
//   - Parser-relative: used by FilteredVideoRanges (zero-copy from parser),
//     DataSlice (adds baseOffset / resolves via multiRegionData internally),
//     and all ES-offset-based reads.
//   - ISO-relative: used by range maps stored in the dedup file. The
//     FileOffsetConverter method provides the conversion function, applied
//     lazily during range map encoding to avoid copying range arrays.
type isoM2TSAdapter struct {
        parser     *MPEGTSParser
        isoData    []byte // full ISO mmap data (contiguous case: used by Data/DataSlice)
        baseOffset int64  // M2TS region start offset within the ISO

        // For non-contiguous multi-extent files:
        mr        *multiRegionData // virtual contiguous view over mmap sub-slices
        extentMap []extentMapEntry // maps logical offset → ISO offset
}

// extentMapEntry maps a range of logical (assembled) offsets to physical ISO offsets.
type extentMapEntry struct {
        LogicalStart int64 // start offset in assembled data
        ISOOffset    int64 // corresponding offset in the ISO file
        Length       int64 // length of this extent
}

// newISOAdapter creates an adapter for an M2TS region within an ISO.
func newISOAdapter(parser *MPEGTSParser, isoData []byte, baseOffset int64) *isoM2TSAdapter {
        return &isoM2TSAdapter{
                parser:     parser,
                isoData:    isoData,
                baseOffset: baseOffset,
        }
}

// newISOAdapterMultiExtent creates an adapter for a non-contiguous M2TS region.
// mr provides a virtual contiguous view over the mmap sub-slices.
// extents describes the physical layout in the ISO.
func newISOAdapterMultiExtent(parser *MPEGTSParser, mr *multiRegionData, extents []isoPhysicalRange) *isoM2TSAdapter {
        // Build the extent map with cumulative logical offsets
        em := make([]extentMapEntry, len(extents))
        logicalOff := int64(0)
        for i, ext := range extents {
                em[i] = extentMapEntry{
                        LogicalStart: logicalOff,
                        ISOOffset:    ext.ISOOffset,
                        Length:       ext.Length,
                }
                logicalOff += ext.Length
        }
        return &isoM2TSAdapter{
                parser:    parser,
                mr:        mr,
                extentMap: em,
        }
}

// --- esDataProvider interface (used by indexer) ---

// Data returns the backing data buffer. For contiguous files, this is the
// full ISO mmap. For multi-extent files, returns nil — use DataSlice instead.
func (a *isoM2TSAdapter) Data() []byte {
        if a.mr != nil {
                return nil
        }
        return a.isoData
}

// DataSlice returns a sub-slice of the backing data at the given offset and size.
// Offsets are parser-relative (from FilteredVideoRanges). The adapter handles
// the mapping to ISO data internally.
func (a *isoM2TSAdapter) DataSlice(off int64, size int) []byte {
        if a.mr != nil {
                // Multi-extent: parser-relative = assembled-relative, resolve via mr
                return a.mr.Slice(off, off+int64(size))
        }
        // Contiguous: parser-relative + baseOffset = ISO-relative
        return a.isoData[off+a.baseOffset : off+a.baseOffset+int64(size)]
}

// DataSize returns the parser's data size (for bounds checking parser-relative offsets).
func (a *isoM2TSAdapter) DataSize() int64 {
        return a.parser.DataSize()
}

// FilteredVideoRanges returns the parser's filtered video ranges (zero-copy).
// FileOffset values are parser-relative. Use FileOffsetConverter to get
// ISO-relative offsets for range map encoding.
func (a *isoM2TSAdapter) FilteredVideoRanges() []PESPayloadRange {
        return a.parser.FilteredVideoRanges()
}

// FilteredAudioRanges returns the parser's filtered audio ranges (zero-copy).
func (a *isoM2TSAdapter) FilteredAudioRanges(subStreamID byte) []PESPayloadRange {
        return a.parser.FilteredAudioRanges(subStreamID)
}

func (a *isoM2TSAdapter) ReadESData(esOffset int64, size int, isVideo bool) ([]byte, error) {
        return a.parser.ReadESData(esOffset, size, isVideo)
}

func (a *isoM2TSAdapter) ReadAudioSubStreamData(subStreamID byte, esOffset int64, size int) ([]byte, error) {
        return a.parser.ReadAudioSubStreamData(subStreamID, esOffset, size)
}

// --- ESReader interface (used by matcher/reconstruction) ---

func (a *isoM2TSAdapter) ESOffsetToFileOffset(esOffset int64, isVideo bool) (fileOffset int64, remaining int) {
        fOff, rem := a.parser.ESOffsetToFileOffset(esOffset, isVideo)
        if a.mr != nil {
                // Multi-extent: parser offset is assembled-relative,
                // convert to ISO-relative for range maps / reconstruction.
                return a.logicalToISO(fOff), rem
        }
        return fOff + a.baseOffset, rem
}

func (a *isoM2TSAdapter) TotalESSize(isVideo bool) int64 {
        return a.parser.TotalESSize(isVideo)
}

func (a *isoM2TSAdapter) AudioSubStreams() []byte {
        return a.parser.AudioSubStreams()
}

func (a *isoM2TSAdapter) AudioSubStreamESSize(subStreamID byte) int64 {
        return a.parser.AudioSubStreamESSize(subStreamID)
}

// --- PESRangeProvider interface (used for range map creation) ---
// FilteredVideoRanges and FilteredAudioRanges already defined above.
// AudioSubStreams already defined above.

// --- FileOffsetAdjuster interface ---

// FileOffsetConverter returns a function that converts parser-relative
// FileOffset values to ISO-relative offsets for range map storage.
func (a *isoM2TSAdapter) FileOffsetConverter() func(int64) int64 {
        if a.mr != nil {
                return a.logicalToISO
        }
        baseOff := a.baseOffset
        return func(off int64) int64 { return off + baseOff }
}

// --- hintedESReader interface (used by matcher expand) ---

func (a *isoM2TSAdapter) ReadESByteWithHint(esOffset int64, isVideo bool, rangeHint int) (byte, int, bool) {
        return a.parser.ReadESByteWithHint(esOffset, isVideo, rangeHint)
}

func (a *isoM2TSAdapter) ReadAudioByteWithHint(subStreamID byte, esOffset int64, rangeHint int) (byte, int, bool) {
        return a.parser.ReadAudioByteWithHint(subStreamID, esOffset, rangeHint)
}

// IsLPCMSubStream always returns false for Blu-ray M2TS (LPCM is DVD-only).
func (a *isoM2TSAdapter) IsLPCMSubStream(_ byte) bool {
        return false
}

// --- ESRangeConverter interface (for V3 format — adds baseOffset to raw ranges) ---

func (a *isoM2TSAdapter) RawRangesForESRegion(esOffset int64, size int, isVideo bool) ([]RawRange, error) {
        ranges, err := a.parser.RawRangesForESRegion(esOffset, size, isVideo)
        if err != nil {
                return nil, err
        }
        return a.adjustRawRanges(ranges), nil
}

func (a *isoM2TSAdapter) RawRangesForAudioSubStream(subStreamID byte, esOffset int64, size int) ([]RawRange, error) {
        ranges, err := a.parser.RawRangesForAudioSubStream(subStreamID, esOffset, size)
        if err != nil {
                return nil, err
        }
        return a.adjustRawRanges(ranges), nil
}

// --- Internal helpers ---

// adjustRawRanges creates a copy of raw ranges with offsets adjusted.
// Raw ranges are small (per-match, not per-packet) so copying is fine.
func (a *isoM2TSAdapter) adjustRawRanges(ranges []RawRange) []RawRange {
        if a.mr != nil {
                // Multi-extent: convert assembled-relative offsets to ISO-relative
                // for range maps stored in the dedup file.
                return a.mapRawRangesToISO(ranges)
        }
        adjusted := make([]RawRange, len(ranges))
        for i, r := range ranges {
                adjusted[i] = RawRange{
                        FileOffset: r.FileOffset + a.baseOffset,
                        Size:       r.Size,
                }
        }
        return adjusted
}

// logicalToISO converts a logical offset in the assembled data to the
// corresponding physical ISO offset using the extent map.
func (a *isoM2TSAdapter) logicalToISO(logicalOff int64) int64 {
        if len(a.extentMap) == 0 {
                return logicalOff
        }
        // Binary search for the extent containing this offset
        idx := sort.Search(len(a.extentMap), func(i int) bool {
                return a.extentMap[i].LogicalStart+a.extentMap[i].Length > logicalOff
        })
        if idx >= len(a.extentMap) {
                // Shouldn't happen — fall back to last extent
                idx = len(a.extentMap) - 1
        }
        e := a.extentMap[idx]
        return e.ISOOffset + (logicalOff - e.LogicalStart)
}

// mapRawRangesToISO converts assembled-relative raw ranges to ISO-relative ranges.
// A single assembled range may span an extent boundary, so it may be split into
// multiple ISO ranges.
func (a *isoM2TSAdapter) mapRawRangesToISO(ranges []RawRange) []RawRange {
        var result []RawRange
        for _, r := range ranges {
                remaining := int64(r.Size)
                logOff := r.FileOffset
                for remaining > 0 {
                        idx := sort.Search(len(a.extentMap), func(i int) bool {
                                return a.extentMap[i].LogicalStart+a.extentMap[i].Length > logOff
                        })
                        if idx >= len(a.extentMap) {
                                break
                        }
                        e := a.extentMap[idx]
                        offsetInExtent := logOff - e.LogicalStart
                        available := e.Length - offsetInExtent
                        chunk := remaining
                        if chunk > available {
                                chunk = available
                        }
                        result = append(result, RawRange{
                                FileOffset: e.ISOOffset + offsetInExtent,
                                Size:       int(chunk),
                        })
                        logOff += chunk
                        remaining -= chunk
                }
        }
        return result
}

package source

// DVD LPCM audio frame format (after 4-byte PS private stream header):
//
//        Byte 0: emphasis(1) | mute(1) | reserved(1) | frame_number(5)
//        Byte 1: quant_word_length(2) | sampling_freq(2) | reserved(1) | num_channels(3)
//        Byte 2: dynamic_range_control
//        Bytes 3+: PCM sample data (big-endian, grouped by bit depth)
//
// DVD stores big-endian samples with per-frame headers, while MKV stores
// A_PCM/INT/LIT (raw little-endian PCM, no framing). The transforms in this
// file convert between these two representations.

// LPCMHeaderSize is the size of the LPCM frame header after the 4-byte PS header.
const LPCMHeaderSize = 3

// LPCMTotalHeaderSize is the total header size to strip (4-byte PS + 3-byte LPCM).
const LPCMTotalHeaderSize = 7

// lpcmIndexSyncInterval is the interval for source-side LPCM sync points.
// One sync point per PES payload range is sufficient when the MKV side uses
// a dense interval. Keeping the source interval large minimizes hash map memory.
const lpcmIndexSyncInterval = 2048

// lpcmMatchSyncInterval is the interval for MKV-side LPCM sync points.
// DVD LPCM PES payloads are typically ~2008 bytes while MKV packets are typically
// ~6400 bytes. Since gcd(2008, 6400) = 8, using an 8-byte interval guarantees
// at least one MKV sync point aligns with each source sync point. This is
// denser than other audio codecs but adds no memory (MKV sync points are lookups,
// not stored in the hash map), and LPCM is rare.
const lpcmMatchSyncInterval = 8

// LPCMFrameHeader represents a parsed DVD LPCM frame header.
type LPCMFrameHeader struct {
        Emphasis     bool
        Mute         bool
        FrameNumber  byte // 5 bits
        Quantization byte // 2 bits: 0=16-bit, 1=20-bit, 2=24-bit
        SampleRate   byte // 2 bits: 0=48kHz, 1=96kHz
        Channels     byte // 3 bits: number of channels minus 1
}

// ParseLPCMFrameHeader parses a 3-byte DVD LPCM frame header.
func ParseLPCMFrameHeader(data []byte) LPCMFrameHeader {
        if len(data) < LPCMHeaderSize {
                return LPCMFrameHeader{}
        }
        return LPCMFrameHeader{
                Emphasis:     data[0]&0x80 != 0,
                Mute:         data[0]&0x40 != 0,
                FrameNumber:  data[0] & 0x1F,
                Quantization: (data[1] >> 6) & 0x03,
                SampleRate:   (data[1] >> 4) & 0x03,
                Channels:     data[1] & 0x07,
        }
}

// IsLPCM16Bit returns true if the quantization code indicates 16-bit LPCM.
// Only 16-bit LPCM is supported for matching and FUSE reconstruction.
// 20-bit (code 1) and 24-bit (code 2) use grouped big-endian packing that
// changes data size during transform, making in-place FUSE reconstruction
// infeasible without significant complexity.
func IsLPCM16Bit(quantization byte) bool {
        return quantization == 0
}

// TransformLPCM16BE performs an in-place byte swap for 16-bit big-endian PCM
// samples, converting to little-endian. Each pair of bytes [HI][LO] becomes
// [LO][HI]. If len(data) is odd, the last byte is left unchanged.
func TransformLPCM16BE(data []byte) {
        n := len(data) &^ 1 // round down to even
        for i := 0; i < n; i += 2 {
                data[i], data[i+1] = data[i+1], data[i]
        }
}

// InverseTransformLPCM16 converts little-endian 16-bit PCM back to big-endian.
// Byte swap is its own inverse, so this is identical to TransformLPCM16BE.
func InverseTransformLPCM16(data []byte) {
        TransformLPCM16BE(data)
}

// FindLPCMIndexSyncPoints returns sync points for source-side LPCM indexing.
// Uses a large interval to keep the source hash map small.
func FindLPCMIndexSyncPoints(data []byte) []int {
        if len(data) == 0 {
                return nil
        }
        var offsets []int
        for off := 0; off < len(data); off += lpcmIndexSyncInterval {
                offsets = append(offsets, off)
        }
        return offsets
}

// FindLPCMMatchSyncPoints returns sync points for MKV-side LPCM matching.
// Uses a dense interval (8 bytes) to ensure alignment with source sync points.
// This adds no memory overhead since MKV sync points are used for hash lookups,
// not stored in the index.
func FindLPCMMatchSyncPoints(data []byte) []int {
        if len(data) == 0 {
                return nil
        }
        var offsets []int
        for off := 0; off < len(data); off += lpcmMatchSyncInterval {
                offsets = append(offsets, off)
        }
        return offsets
}

// IsLPCMSubStreamID returns true if the sub-stream ID is in the LPCM range (0xA0-0xA7).
func IsLPCMSubStreamID(subStreamID byte) bool {
        return subStreamID >= 0xA0 && subStreamID <= 0xA7
}

package source

import (
        "bytes"
        "encoding/binary"
        "fmt"
)

// MPEG-PS start codes
const (
        PackStartCode      = 0x000001BA
        SystemHeaderCode   = 0x000001BB
        ProgramEndCode     = 0x000001B9
        PrivateStream1Code = 0x000001BD
        PrivateStream2Code = 0x000001BF
        PaddingStreamCode  = 0x000001BE
        VideoStreamMinCode = 0x000001E0
        VideoStreamMaxCode = 0x000001EF
        AudioStreamMinCode = 0x000001C0
        AudioStreamMaxCode = 0x000001DF
)

// PESPacket represents a parsed PES packet from an MPEG-PS stream.
type PESPacket struct {
        StreamID      byte  // Stream identifier (E0-EF = video, C0-DF = audio, BD = private)
        SubStreamID   byte  // Sub-stream ID for Private Stream 1 (0x80-0x87 = AC3, 0x88-0x8F = DTS)
        Offset        int64 // Offset of the PES packet start in the file
        HeaderSize    int   // Total header size (start code + length + PES header + private header)
        PayloadOffset int64 // Offset of the actual audio/video payload
        PayloadSize   int   // Size of the payload
        IsVideo       bool  // True if this is a video stream
        IsAudio       bool  // True if this is an audio stream
}

// PESPayloadRange represents a contiguous range of elementary stream payload data.
type PESPayloadRange struct {
        FileOffset int64 // Offset in the MPEG-PS file
        Size       int   // Size of this payload chunk
        ESOffset   int64 // Logical offset in the elementary stream
}

// MPEGPSParser parses MPEG Program Stream files to extract PES packet information.
type MPEGPSParser struct {
        data                []byte // Direct mmap'd data - zero-copy access
        size                int64
        packets             []PESPacket
        videoRanges         []PESPayloadRange
        audioRanges         []PESPayloadRange
        audioRangeStreamIDs []byte // PES stream ID for each audioRange (0xBD or 0xC0-0xDF)
        // Filtered ranges exclude user_data sections for MKV-compatible matching
        filteredVideoRanges []PESPayloadRange
        // Filtered audio ranges per sub-stream ID - separates interleaved audio tracks
        // Each sub-stream (0x80, 0x81, etc.) gets its own filtered range set
        filteredAudioBySubStream map[byte][]PESPayloadRange
        // audioSubStreams lists the sub-stream IDs in order of appearance
        audioSubStreams []byte
        filterUserData  bool
        // LPCM sub-stream tracking
        lpcmSubStreams map[byte]bool            // which sub-streams are LPCM
        lpcmInfo       map[byte]LPCMFrameHeader // parsed header per LPCM sub-stream
}

// NewMPEGPSParser creates a parser for the given memory-mapped data.
// The data slice should be from a zero-copy mmap (unix.Mmap).
func NewMPEGPSParser(data []byte) *MPEGPSParser {
        return &MPEGPSParser{
                data: data,
                size: int64(len(data)),
        }
}

// MPEGPSProgressFunc is called to report MPEG-PS parsing progress.
type MPEGPSProgressFunc func(processed, total int64)

// Parse scans the file and extracts all PES packet information.
func (p *MPEGPSParser) Parse() error {
        return p.ParseWithProgress(nil)
}

// ParseWithProgress scans the file with progress reporting.
func (p *MPEGPSParser) ParseWithProgress(progress MPEGPSProgressFunc) error {
        pos := int64(0)
        var videoESOffset, audioESOffset int64
        lastProgress := int64(0)

        // Pre-allocate slices to reduce reallocation churn
        // Estimate: average PES packet ~2KB, so ~size/2048 packets
        // We split roughly 60% video, 40% audio
        estimatedPackets := int(p.size / 2048)
        if estimatedPackets < 1000 {
                estimatedPackets = 1000
        }
        p.packets = make([]PESPacket, 0, estimatedPackets)
        p.videoRanges = make([]PESPayloadRange, 0, estimatedPackets*6/10)
        p.audioRanges = make([]PESPayloadRange, 0, estimatedPackets*4/10)
        p.audioRangeStreamIDs = make([]byte, 0, estimatedPackets*4/10)

        for pos < p.size-4 {
                // Direct slice access - zero copy
                end := pos + 4*1024*1024 // Process in ~4MB logical chunks for progress
                if end > p.size {
                        end = p.size
                }
                chunkData := p.data[pos:end]
                if len(chunkData) < 4 {
                        break
                }

                // Scan for start codes within this chunk
                i := 0
                for i < len(chunkData)-4 {
                        // Fast scan for 00 00 01 prefix
                        if chunkData[i] != 0 {
                                i++
                                continue
                        }
                        if chunkData[i+1] != 0 {
                                i += 2
                                continue
                        }
                        if chunkData[i+2] != 1 {
                                i++
                                continue
                        }

                        // Found potential start code at pos + i
                        startCodePos := pos + int64(i)
                        startCode := uint32(0x00000100) | uint32(chunkData[i+3])

                        advance := int64(1)

                        switch {
                        case startCode == PackStartCode:
                                packSize, err := p.parsePackHeader(startCodePos)
                                if err == nil {
                                        advance = int64(packSize)
                                }

                        case startCode == SystemHeaderCode:
                                headerLen, err := p.parseSystemHeader(startCodePos)
                                if err == nil {
                                        advance = int64(headerLen)
                                }

                        case startCode == ProgramEndCode:
                                // End of program stream - but DVDs can have multiple programs
                                // (menu, main feature, extras, etc.), so continue parsing
                                advance = 4

                        case startCode == PaddingStreamCode:
                                length, err := p.readPESLength(startCodePos + 4)
                                if err == nil {
                                        advance = 6 + int64(length)
                                }

                        case startCode == PrivateStream1Code:
                                pkt, err := p.parsePESPacket(startCodePos, byte(startCode&0xFF))
                                if err == nil {
                                        pkt.IsAudio = true
                                        p.packets = append(p.packets, pkt)
                                        p.audioRanges = append(p.audioRanges, PESPayloadRange{
                                                FileOffset: pkt.PayloadOffset,
                                                Size:       pkt.PayloadSize,
                                                ESOffset:   audioESOffset,
                                        })
                                        p.audioRangeStreamIDs = append(p.audioRangeStreamIDs, 0xBD)
                                        audioESOffset += int64(pkt.PayloadSize)
                                        advance = int64(pkt.HeaderSize + pkt.PayloadSize)
                                }

                        case startCode >= VideoStreamMinCode && startCode <= VideoStreamMaxCode:
                                pkt, err := p.parsePESPacket(startCodePos, byte(startCode&0xFF))
                                if err == nil {
                                        pkt.IsVideo = true
                                        p.packets = append(p.packets, pkt)
                                        p.videoRanges = append(p.videoRanges, PESPayloadRange{
                                                FileOffset: pkt.PayloadOffset,
                                                Size:       pkt.PayloadSize,
                                                ESOffset:   videoESOffset,
                                        })
                                        videoESOffset += int64(pkt.PayloadSize)
                                        advance = int64(pkt.HeaderSize + pkt.PayloadSize)
                                }

                        case startCode >= AudioStreamMinCode && startCode <= AudioStreamMaxCode:
                                pkt, err := p.parsePESPacket(startCodePos, byte(startCode&0xFF))
                                if err == nil {
                                        pkt.IsAudio = true
                                        p.packets = append(p.packets, pkt)
                                        p.audioRanges = append(p.audioRanges, PESPayloadRange{
                                                FileOffset: pkt.PayloadOffset,
                                                Size:       pkt.PayloadSize,
                                                ESOffset:   audioESOffset,
                                        })
                                        p.audioRangeStreamIDs = append(p.audioRangeStreamIDs, pkt.StreamID)
                                        audioESOffset += int64(pkt.PayloadSize)
                                        advance = int64(pkt.HeaderSize + pkt.PayloadSize)
                                }
                        }

                        // Move forward by the packet size (or 1 if unknown)
                        newPos := startCodePos + advance
                        i = int(newPos - pos)
                }

                // Move to next chunk, but back up slightly to catch start codes at boundaries
                pos += int64(len(chunkData)) - 3
                if pos < 0 {
                        pos = 0
                }

                // Report progress
                if progress != nil && pos-lastProgress > 100*1024*1024 { // Every 100MB
                        progress(pos, p.size)
                        lastProgress = pos
                }
        }

        if progress != nil {
                progress(p.size, p.size)
        }

        // Build filtered video ranges that exclude user_data (B2) sections
        // This makes the ES compatible with what MKV tools produce
        if err := p.buildFilteredVideoRanges(); err != nil {
                return fmt.Errorf("build filtered video ranges: %w", err)
        }

        // Build filtered audio ranges that strip Private Stream 1 headers
        // (sub-stream ID and 2-byte pointer, keeping frame count byte)
        if err := p.buildFilteredAudioRanges(); err != nil {
                return fmt.Errorf("build filtered audio ranges: %w", err)
        }

        p.filterUserData = true

        return nil
}

// buildFilteredVideoRanges scans the video ES and creates ranges that exclude user_data sections.
// User_data (00 00 01 B2) is used for closed captions etc. and is stripped by MKV tools.
// Optimized to use bytes.IndexByte for fast scanning (uses SIMD on x86).
func (p *MPEGPSParser) buildFilteredVideoRanges() error {
        if len(p.videoRanges) == 0 {
                return nil
        }

        // Process each raw video range individually
        // This avoids complex chunk boundary handling
        // Pre-allocate with similar capacity to reduce reallocation
        filteredRanges := make([]PESPayloadRange, 0, len(p.videoRanges))
        var filteredESOffset int64

        for _, rawRange := range p.videoRanges {
                // Direct slice access - zero copy, no allocation
                endOffset := rawRange.FileOffset + int64(rawRange.Size)
                if endOffset > p.size {
                        continue
                }
                data := p.data[rawRange.FileOffset:endOffset]

                // Scan for user_data sections within this PES payload
                // Use bytes.IndexByte to quickly find 0x01 bytes (SIMD optimized)
                i := 2 // Start at position 2 since we need at least 00 00 before 01
                rangeStart := 0
                for i < len(data)-1 {
                        // Find next 0x01 byte
                        idx := bytes.IndexByte(data[i:], 0x01)
                        if idx < 0 {
                                break
                        }
                        pos := i + idx

                        // Check if this is a user_data start code (00 00 01 B2)
                        if pos >= 2 && pos < len(data)-1 &&
                                data[pos-1] == 0x00 && data[pos-2] == 0x00 && data[pos+1] == UserDataStartCode {
                                // Found user_data - emit range before it
                                startCodePos := pos - 2
                                if startCodePos > rangeStart {
                                        filteredRanges = append(filteredRanges, PESPayloadRange{
                                                FileOffset: rawRange.FileOffset + int64(rangeStart),
                                                Size:       startCodePos - rangeStart,
                                                ESOffset:   filteredESOffset,
                                        })
                                        filteredESOffset += int64(startCodePos - rangeStart)
                                }

                                // Skip user_data section to next start code using fast scan
                                i = pos + 2
                                for i < len(data)-1 {
                                        idx := bytes.IndexByte(data[i:], 0x01)
                                        if idx < 0 {
                                                i = len(data)
                                                break
                                        }
                                        nextPos := i + idx
                                        if nextPos >= 2 && data[nextPos-1] == 0x00 && data[nextPos-2] == 0x00 {
                                                // Found next start code
                                                i = nextPos - 2
                                                break
                                        }
                                        i = nextPos + 1
                                }
                                rangeStart = i
                        } else {
                                i = pos + 1
                        }
                }

                // Emit remaining data in this PES payload
                if rangeStart < len(data) {
                        filteredRanges = append(filteredRanges, PESPayloadRange{
                                FileOffset: rawRange.FileOffset + int64(rangeStart),
                                Size:       len(data) - rangeStart,
                                ESOffset:   filteredESOffset,
                        })
                        filteredESOffset += int64(len(data) - rangeStart)
                }
        }

        p.filteredVideoRanges = filteredRanges
        return nil
}

// buildFilteredAudioRanges creates per-sub-stream filtered audio ranges.
//
// For Private Stream 1 (0xBD), DVD audio has this structure:
//
//        Byte 0: sub-stream ID (0x80-0x87 = AC3, 0x88-0x8F = DTS, etc.)
//        Byte 1: number of audio frames
//        Bytes 2-3: first access unit pointer (offset to first audio frame)
//        Bytes 4+: audio data (for AC3/DTS)
//
// For LPCM sub-streams (0xA0-0xA7), there are 3 additional header bytes after the
// 4-byte PS header (emphasis/mute/frame_number, quant/samplerate/channels, DRC),
// so we strip 7 bytes total. The LPCM header is parsed once per sub-stream.
//
// For MPEG-1 audio streams (0xC0-0xDF), the PES payload is raw MP2 frame data
// with no sub-stream header. The stream ID is used as a pseudo sub-stream ID.
//
// Each sub-stream ID gets its own separate filtered ES to avoid interleaving issues.
func (p *MPEGPSParser) buildFilteredAudioRanges() error {
        if len(p.audioRanges) == 0 {
                return nil
        }

        // Map to track ranges per sub-stream
        rangesBySubStream := make(map[byte][]PESPayloadRange)
        esOffsetBySubStream := make(map[byte]int64)
        seenSubStreams := make(map[byte]bool)
        p.lpcmSubStreams = make(map[byte]bool)
        p.lpcmInfo = make(map[byte]LPCMFrameHeader)

        for i, rawRange := range p.audioRanges {
                if rawRange.FileOffset >= p.size {
                        continue
                }

                pesStreamID := p.audioRangeStreamIDs[i]

                // MPEG-1 audio streams (0xC0-0xDF): payload is raw MP2 data, no sub-stream header
                if pesStreamID >= 0xC0 && pesStreamID <= 0xDF {
                        if rawRange.Size <= 0 {
                                continue
                        }
                        // Use the PES stream ID as a pseudo sub-stream ID
                        if !seenSubStreams[pesStreamID] {
                                seenSubStreams[pesStreamID] = true
                                p.audioSubStreams = append(p.audioSubStreams, pesStreamID)
                        }
                        esOffset := esOffsetBySubStream[pesStreamID]
                        rangesBySubStream[pesStreamID] = append(rangesBySubStream[pesStreamID], PESPayloadRange{
                                FileOffset: rawRange.FileOffset,
                                Size:       rawRange.Size,
                                ESOffset:   esOffset,
                        })
                        esOffsetBySubStream[pesStreamID] += int64(rawRange.Size)
                        continue
                }

                // Private Stream 1 (0xBD): has sub-stream header
                if rawRange.Size < 4 {
                        continue
                }

                subStreamID := p.data[rawRange.FileOffset]

                // Check if this is AC3, DTS, or LPCM
                isAC3 := subStreamID >= 0x80 && subStreamID <= 0x87
                isDTS := subStreamID >= 0x88 && subStreamID <= 0x8F
                isLPCM := subStreamID >= 0xA0 && subStreamID <= 0xA7

                if isAC3 || isDTS || isLPCM {
                        // Track sub-stream order
                        if !seenSubStreams[subStreamID] {
                                seenSubStreams[subStreamID] = true
                                p.audioSubStreams = append(p.audioSubStreams, subStreamID)
                        }

                        if isLPCM {
                                // Strip 7 bytes: 4-byte PS header + 3-byte LPCM frame header
                                if rawRange.Size > LPCMTotalHeaderSize {
                                        // Parse LPCM header on first packet to get bit depth
                                        if _, ok := p.lpcmInfo[subStreamID]; !ok {
                                                headerEnd := rawRange.FileOffset + 4 + LPCMHeaderSize
                                                if headerEnd > p.size {
                                                        continue
                                                }
                                                headerData := p.data[rawRange.FileOffset+4 : headerEnd]
                                                info := ParseLPCMFrameHeader(headerData)
                                                p.lpcmInfo[subStreamID] = info
                                                // Only 16-bit LPCM is supported for byte-swap matching.
                                                // 20/24-bit uses grouped packing that changes data size
                                                // during transform, so it falls through to delta.
                                                if IsLPCM16Bit(info.Quantization) {
                                                        p.lpcmSubStreams[subStreamID] = true
                                                }
                                        }
                                        esOffset := esOffsetBySubStream[subStreamID]
                                        rangesBySubStream[subStreamID] = append(rangesBySubStream[subStreamID], PESPayloadRange{
                                                FileOffset: rawRange.FileOffset + LPCMTotalHeaderSize,
                                                Size:       rawRange.Size - LPCMTotalHeaderSize,
                                                ESOffset:   esOffset,
                                        })
                                        esOffsetBySubStream[subStreamID] += int64(rawRange.Size - LPCMTotalHeaderSize)
                                }
                        } else {
                                // Strip the entire 4-byte header, keep only raw audio data
                                if rawRange.Size > 4 {
                                        esOffset := esOffsetBySubStream[subStreamID]
                                        rangesBySubStream[subStreamID] = append(rangesBySubStream[subStreamID], PESPayloadRange{
                                                FileOffset: rawRange.FileOffset + 4, // Skip header (1 + 1 + 2)
                                                Size:       rawRange.Size - 4,       // Rest is audio data
                                                ESOffset:   esOffset,
                                        })
                                        esOffsetBySubStream[subStreamID] += int64(rawRange.Size - 4)
                                }
                        }
                }
                // Skip unknown sub-stream types (like subtitles 0x20-0x3F)
        }

        p.filteredAudioBySubStream = rangesBySubStream
        return nil
}

// parsePackHeader parses an MPEG-2 pack header and returns its size.
func (p *MPEGPSParser) parsePackHeader(pos int64) (int, error) {
        // MPEG-2 pack header is 14 bytes minimum
        // Format: 00 00 01 BA + SCR (6 bytes) + mux_rate (3 bytes) + stuffing
        if pos+14 > p.size {
                return 0, fmt.Errorf("failed to read pack header")
        }
        buf := p.data[pos : pos+14]

        // Check if this is MPEG-2 (starts with 01) or MPEG-1 (starts with 0010)
        if buf[4]&0xC0 == 0x40 {
                // MPEG-2 pack header
                stuffingLen := int(buf[13] & 0x07)
                return 14 + stuffingLen, nil
        }

        // MPEG-1 pack header is 12 bytes
        return 12, nil
}

// parseSystemHeader parses a system header and returns its total size.
func (p *MPEGPSParser) parseSystemHeader(pos int64) (int, error) {
        length, err := p.readPESLength(pos + 4)
        if err != nil {
                return 0, err
        }
        return 6 + int(length), nil
}

// readPESLength reads the 2-byte PES packet length field.
func (p *MPEGPSParser) readPESLength(pos int64) (uint16, error) {
        if pos+2 > p.size {
                return 0, fmt.Errorf("failed to read PES length")
        }
        return binary.BigEndian.Uint16(p.data[pos : pos+2]), nil
}

// parsePESPacket parses a PES packet header and returns packet info.
func (p *MPEGPSParser) parsePESPacket(pos int64, streamID byte) (PESPacket, error) {
        pkt := PESPacket{
                StreamID: streamID,
                Offset:   pos,
        }

        // Read length field
        length, err := p.readPESLength(pos + 4)
        if err != nil {
                return pkt, err
        }

        // PES packet structure after start code + stream ID + length:
        // - 2 bits: '10'
        // - 2 bits: PES_scrambling_control
        // - 1 bit: PES_priority
        // - 1 bit: data_alignment_indicator
        // - 1 bit: copyright
        // - 1 bit: original_or_copy
        // - 2 bits: PTS_DTS_flags
        // - 1 bit: ESCR_flag
        // - 1 bit: ES_rate_flag
        // - 1 bit: DSM_trick_mode_flag
        // - 1 bit: additional_copy_info_flag
        // - 1 bit: PES_CRC_flag
        // - 1 bit: PES_extension_flag
        // - 8 bits: PES_header_data_length
        // Then optional fields based on flags

        // Direct slice access for PES header fields
        if pos+9 > p.size {
                return pkt, fmt.Errorf("failed to read PES header")
        }
        buf := p.data[pos+6 : pos+9]

        // Check for MPEG-2 PES (starts with 10)
        if buf[0]&0xC0 == 0x80 {
                // MPEG-2 PES header
                headerDataLen := int(buf[2])
                pkt.HeaderSize = 6 + 3 + headerDataLen // start code(4) + length(2) + flags(2) + header_len(1) + header_data
                pkt.PayloadOffset = pos + int64(pkt.HeaderSize)
                pkt.PayloadSize = int(length) - 3 - headerDataLen
        } else {
                // MPEG-1 PES header - simpler structure
                // Skip stuffing bytes (0xFF) and find actual header
                headerLen := 0
                offset := pos + 6
                for {
                        if offset+int64(headerLen) >= p.size {
                                return pkt, fmt.Errorf("failed to read PES header: offset out of range")
                        }
                        b := p.data[offset+int64(headerLen)]
                        if b == 0xFF {
                                headerLen++
                                if headerLen > 16 { // Safety limit
                                        break
                                }
                                continue
                        }
                        if b&0xC0 == 0x40 {
                                // STD buffer
                                headerLen += 2
                                continue
                        }
                        if b&0xF0 == 0x20 {
                                // PTS only
                                headerLen += 5
                        } else if b&0xF0 == 0x30 {
                                // PTS + DTS
                                headerLen += 10
                        } else if b == 0x0F {
                                // No timestamps
                                headerLen++
                        }
                        break
                }
                pkt.HeaderSize = 6 + headerLen
                pkt.PayloadOffset = pos + int64(pkt.HeaderSize)
                pkt.PayloadSize = int(length) - headerLen
        }

        if pkt.PayloadSize < 0 {
                pkt.PayloadSize = 0
        }

        return pkt, nil
}

// VideoRanges returns all video payload ranges found in the stream.
func (p *MPEGPSParser) VideoRanges() []PESPayloadRange {
        return p.videoRanges
}

// FilteredVideoRangesCount returns the number of filtered video ranges.
func (p *MPEGPSParser) FilteredVideoRangesCount() int {
        return len(p.filteredVideoRanges)
}

// RawVideoESSize returns the total size of raw (unfiltered) video ES.
func (p *MPEGPSParser) RawVideoESSize() int64 {
        if len(p.videoRanges) == 0 {
                return 0
        }
        last := p.videoRanges[len(p.videoRanges)-1]
        return last.ESOffset + int64(last.Size)
}

// AudioRanges returns all audio payload ranges found in the stream.
func (p *MPEGPSParser) AudioRanges() []PESPayloadRange {
        return p.audioRanges
}

// Packets returns all parsed PES packets.
func (p *MPEGPSParser) Packets() []PESPacket {
        return p.packets
}

// FileOffsetToESOffset converts a file offset within a payload to an ES offset.
// Returns -1 if the offset is not within a known payload range.
func (p *MPEGPSParser) FileOffsetToESOffset(fileOffset int64, isVideo bool) int64 {
        ranges := p.audioRanges
        if isVideo {
                ranges = p.videoRanges
        }

        for _, r := range ranges {
                if fileOffset >= r.FileOffset && fileOffset < r.FileOffset+int64(r.Size) {
                        offsetInPayload := fileOffset - r.FileOffset
                        return r.ESOffset + offsetInPayload
                }
        }
        return -1
}

// ESOffsetToFileOffset converts an ES offset to a file offset.
// Returns the file offset and payload remaining size, or -1 if not found.
func (p *MPEGPSParser) ESOffsetToFileOffset(esOffset int64, isVideo bool) (fileOffset int64, remaining int) {
        ranges := p.audioRanges
        if isVideo {
                ranges = p.videoRanges
        }

        for _, r := range ranges {
                if esOffset >= r.ESOffset && esOffset < r.ESOffset+int64(r.Size) {
                        offsetInPayload := esOffset - r.ESOffset
                        return r.FileOffset + offsetInPayload, r.Size - int(offsetInPayload)
                }
        }
        return -1, 0
}

// TotalESSize returns the total size of the elementary stream.
// For video, returns filtered ES size when filtering is enabled.
// For audio, this returns 0 - use AudioSubStreamESSize instead.
func (p *MPEGPSParser) TotalESSize(isVideo bool) int64 {
        if !isVideo {
                return 0
        }
        if p.filterUserData && len(p.filteredVideoRanges) > 0 {
                return totalESSizeFromRanges(p.filteredVideoRanges)
        }
        return totalESSizeFromRanges(p.videoRanges)
}

// AudioSubStreams returns the list of audio sub-stream IDs in order of appearance.
func (p *MPEGPSParser) AudioSubStreams() []byte {
        return p.audioSubStreams
}

// AudioSubStreamCount returns the number of audio sub-streams.
func (p *MPEGPSParser) AudioSubStreamCount() int {
        return len(p.audioSubStreams)
}

// AudioSubStreamESSize returns the total ES size for a specific audio sub-stream.
func (p *MPEGPSParser) AudioSubStreamESSize(subStreamID byte) int64 {
        return totalESSizeFromRanges(p.filteredAudioBySubStream[subStreamID])
}

// FilteredVideoRanges returns the filtered video payload ranges for zero-copy iteration.
// Returns the raw video ranges if filtering is not enabled.
func (p *MPEGPSParser) FilteredVideoRanges() []PESPayloadRange {
        if p.filterUserData && len(p.filteredVideoRanges) > 0 {
                return p.filteredVideoRanges
        }
        return p.videoRanges
}

// FilteredAudioRanges returns the filtered audio payload ranges for a specific sub-stream.
// Returns nil if the sub-stream doesn't exist.
func (p *MPEGPSParser) FilteredAudioRanges(subStreamID byte) []PESPayloadRange {
        return p.filteredAudioBySubStream[subStreamID]
}

// Data returns the raw mmap'd file data for zero-copy access.
func (p *MPEGPSParser) Data() []byte {
        return p.data
}

// DataSlice returns a sub-slice of the backing data at the given offset and size.
func (p *MPEGPSParser) DataSlice(off int64, size int) []byte {
        return p.data[off : off+int64(size)]
}

// DataSize returns the total size of the backing data.
func (p *MPEGPSParser) DataSize() int64 {
        return p.size
}

// ReadESByteWithHint reads a single byte from the ES stream, using a range hint
// to avoid binary search when reading sequentially. Returns the byte, the range
// index where it was found (for use as hint on next call), and success status.
// Pass rangeHint=-1 to force binary search.
func (p *MPEGPSParser) ReadESByteWithHint(esOffset int64, isVideo bool, rangeHint int) (byte, int, bool) {
        if !isVideo {
                // Audio doesn't use this method - it goes through sub-stream reader
                return 0, -1, false
        }
        var ranges []PESPayloadRange
        if p.filterUserData && len(p.filteredVideoRanges) > 0 {
                ranges = p.filteredVideoRanges
        } else {
                ranges = p.videoRanges
        }
        return readByteWithHint(p.data, nil, p.size, ranges, esOffset, rangeHint)
}

// ReadAudioByteWithHint reads a single byte from an audio sub-stream, using a range hint.
// For LPCM sub-streams (16-bit only), swaps even/odd byte positions to convert big-endian to little-endian.
func (p *MPEGPSParser) ReadAudioByteWithHint(subStreamID byte, esOffset int64, rangeHint int) (byte, int, bool) {
        if p.lpcmSubStreams[subStreamID] {
                // Swap even/odd byte position: XOR with 1
                swappedOffset := esOffset ^ 1
                return readByteWithHint(p.data, nil, p.size, p.filteredAudioBySubStream[subStreamID], swappedOffset, rangeHint)
        }
        return readByteWithHint(p.data, nil, p.size, p.filteredAudioBySubStream[subStreamID], esOffset, rangeHint)
}

// Video start codes that should be KEPT (not user_data)
const (
        UserDataStartCode = 0xB2 // This gets stripped by MKV tools
)

// RawRange represents a contiguous chunk of raw file data corresponding to
// part of an ES region. Used for converting ES offsets to raw file offsets.
type RawRange struct {
        FileOffset int64 // Offset in the raw file
        Size       int   // Size of this chunk
}

// RawRangesForESRegion returns the raw file ranges that contain the given ES region.
// For video streams only - audio should use RawRangesForAudioSubStream.
func (p *MPEGPSParser) RawRangesForESRegion(esOffset int64, size int, isVideo bool) ([]RawRange, error) {
        if !isVideo {
                return nil, fmt.Errorf("audio uses per-sub-stream methods, use RawRangesForAudioSubStream")
        }
        var ranges []PESPayloadRange
        if p.filterUserData && len(p.filteredVideoRanges) > 0 {
                ranges = p.filteredVideoRanges
        } else {
                ranges = p.videoRanges
        }
        return rawRangesFromPESRanges(ranges, esOffset, size)
}

// RawRangesForAudioSubStream returns the raw file ranges for audio data from a specific sub-stream.
func (p *MPEGPSParser) RawRangesForAudioSubStream(subStreamID byte, esOffset int64, size int) ([]RawRange, error) {
        ranges, ok := p.filteredAudioBySubStream[subStreamID]
        if !ok {
                return nil, fmt.Errorf("audio sub-stream 0x%02X not found", subStreamID)
        }
        return rawRangesFromPESRanges(ranges, esOffset, size)
}

// ReadESData reads elementary stream data at the given ES offset.
// For video, returns FILTERED ES data (excludes user_data sections).
// For audio, returns error - use ReadAudioSubStreamData instead.
func (p *MPEGPSParser) ReadESData(esOffset int64, size int, isVideo bool) ([]byte, error) {
        if !isVideo {
                return nil, fmt.Errorf("audio uses per-sub-stream methods, use ReadAudioSubStreamData")
        }
        var ranges []PESPayloadRange
        if p.filterUserData && len(p.filteredVideoRanges) > 0 {
                ranges = p.filteredVideoRanges
        } else {
                ranges = p.videoRanges
        }
        return readFromRanges(p.data, nil, p.size, ranges, esOffset, size)
}

// ReadAudioSubStreamData reads audio data from a specific sub-stream.
// For LPCM sub-streams, the data is byte-swapped to match MKV little-endian format.
// Handles alignment: if esOffset is odd, reads from the pair-aligned offset,
// swaps, and returns only the requested portion.
func (p *MPEGPSParser) ReadAudioSubStreamData(subStreamID byte, esOffset int64, size int) ([]byte, error) {
        ranges, ok := p.filteredAudioBySubStream[subStreamID]
        if !ok {
                return nil, fmt.Errorf("audio sub-stream 0x%02X not found", subStreamID)
        }

        if !p.lpcmSubStreams[subStreamID] {
                return readFromRanges(p.data, nil, p.size, ranges, esOffset, size)
        }

        // LPCM 16-bit forward transform (DVD big-endian → MKV little-endian).
        // Byte-swap pairs are aligned to the ES start (pairs at offsets 0-1, 2-3, ...).
        // If esOffset is odd, we must read one extra byte before to complete the pair.
        alignedOffset := esOffset
        trimFront := 0
        if esOffset%2 == 1 {
                alignedOffset = esOffset - 1
                trimFront = 1
        }
        alignedSize := size + trimFront
        // If alignedSize is odd, extend by 1 to complete the trailing pair
        // (if data is available).
        trimBack := 0
        if alignedSize%2 == 1 {
                alignedSize++
                trimBack = 1
        }

        data, err := readFromRanges(p.data, nil, p.size, ranges, alignedOffset, alignedSize)
        if err != nil {
                // If extending caused an out-of-range error, retry without the trailing extension
                if trimBack > 0 {
                        alignedSize--
                        trimBack = 0
                        data, err = readFromRanges(p.data, nil, p.size, ranges, alignedOffset, alignedSize)
                }
                if err != nil {
                        return nil, err
                }
        }

        // readFromRanges may return a zero-copy mmap slice, so clone first
        result := make([]byte, len(data))
        copy(result, data)
        TransformLPCM16BE(result)

        // Trim to the originally requested range
        start := trimFront
        end := start + size
        if end > len(result) {
                end = len(result)
        }
        return result[start:end], nil
}

// IsLPCMSubStream returns true if the given sub-stream ID is an LPCM sub-stream.
func (p *MPEGPSParser) IsLPCMSubStream(subStreamID byte) bool {
        return p.lpcmSubStreams[subStreamID]
}

package source

import (
        "fmt"
        "io"
        "os"
        "strings"
)

// detectDVDCodecs extracts codec information from an already-indexed DVD source.
// The MPEG-PS parser has already identified video and audio streams during indexing.
func detectDVDCodecs(index *Index) (*SourceCodecs, error) {
        codecs := &SourceCodecs{}

        for _, esReader := range index.ESReaders {
                parser, ok := esReader.(*MPEGPSParser)
                if !ok {
                        continue
                }

                // Video: DVD is MPEG-2 if video ranges exist
                if parser.TotalESSize(true) > 0 {
                        if !containsCodec(codecs.VideoCodecs, CodecMPEG2Video) {
                                codecs.VideoCodecs = append(codecs.VideoCodecs, CodecMPEG2Video)
                        }
                }

                // Audio from sub-streams (Private Stream 1 and MPEG-1 audio)
                for _, subStreamID := range parser.AudioSubStreams() {
                        var ct CodecType
                        switch {
                        case subStreamID >= 0x80 && subStreamID <= 0x87:
                                ct = CodecAC3Audio
                        case subStreamID >= 0x88 && subStreamID <= 0x8F:
                                ct = CodecDTSAudio
                        case subStreamID >= 0xA0 && subStreamID <= 0xA7:
                                ct = CodecLPCMAudio
                        case subStreamID >= 0xC0 && subStreamID <= 0xDF:
                                ct = CodecMPEGAudio
                        default:
                                continue
                        }
                        if !containsCodec(codecs.AudioCodecs, ct) {
                                codecs.AudioCodecs = append(codecs.AudioCodecs, ct)
                        }
                }
        }

        return codecs, nil
}

// detectDVDCodecsFromFile detects codecs from a DVD ISO by parsing VTS IFO
// metadata files. IFO files authoritatively declare every stream in each title
// set, unlike PES scanning which can miss audio streams that appear later in
// the VOB data. Falls back to PES scanning if IFO parsing fails.
func detectDVDCodecsFromFile(path string) (*SourceCodecs, error) {
        f, err := os.Open(path)
        if err != nil {
                return nil, fmt.Errorf("open ISO file: %w", err)
        }
        defer f.Close()

        // Try IFO-based detection first (ISO9660, then UDF).
        if ifos := findIFOsInISO(f); len(ifos) > 0 {
                codecs, err := detectDVDCodecsFromIFOs(f, ifos)
                if err == nil {
                        return codecs, nil
                }
        }
        if ifos, err := findIFOsInUDF(f); err == nil && len(ifos) > 0 {
                codecs, err := detectDVDCodecsFromIFOs(f, ifos)
                if err == nil {
                        return codecs, nil
                }
        }

        // Fallback: scan PES data from VOBs.
        return detectDVDCodecsFromFilePES(f)
}

// detectDVDCodecsFromFilePES scans PES start codes in VOB data to detect codecs.
// This is the legacy approach, kept as a fallback for ISOs where IFO parsing fails.
func detectDVDCodecsFromFilePES(f *os.File) (*SourceCodecs, error) {
        vobs := findContentVOBs(f)
        if len(vobs) == 0 {
                return scanDVDRegion(f, 0) // fallback: scan from start of ISO
        }

        merged := &SourceCodecs{}
        var lastErr error
        anySuccess := false
        for _, v := range significantFiles(vobs) {
                codecs, err := scanDVDRegion(f, v.Offset)
                if err != nil {
                        lastErr = err
                        continue
                }
                mergeSourceCodecs(merged, codecs)
                anySuccess = true
        }
        if !anySuccess {
                // Fall back to scanning from start of ISO
                fallback, err := scanDVDRegion(f, 0)
                if err == nil {
                        return fallback, nil
                }
                if lastErr != nil {
                        return nil, fmt.Errorf("failed to scan any DVD VOBs: %w", lastErr)
                }
                return nil, err
        }
        return merged, nil
}

// scanDVDRegion reads 4MB from the given offset and scans for MPEG-PS codecs.
func scanDVDRegion(f *os.File, offset int64) (*SourceCodecs, error) {
        const scanSize = 4 * 1024 * 1024
        buf := make([]byte, scanSize)
        n, err := f.ReadAt(buf, offset)
        if err == io.EOF || err == io.ErrUnexpectedEOF {
                buf = buf[:n]
        } else if err != nil {
                return nil, fmt.Errorf("read %s at offset %d: %w", f.Name(), offset, err)
        }
        if n == 0 {
                return nil, fmt.Errorf("no data at offset %d in %s", offset, f.Name())
        }
        return scanPESCodecs(buf)
}

// scanPESCodecs scans a byte buffer for MPEG-PS PES headers and extracts codec information.
func scanPESCodecs(buf []byte) (*SourceCodecs, error) {
        codecs := &SourceCodecs{}

        // Scan for PES start codes: 0x00 0x00 0x01 <stream_id>
        for i := 0; i+3 < len(buf); i++ {
                if buf[i] != 0x00 || buf[i+1] != 0x00 || buf[i+2] != 0x01 {
                        continue
                }
                streamID := buf[i+3]

                switch {
                case streamID >= 0xE0 && streamID <= 0xEF:
                        // Video stream — DVD is MPEG-2
                        if !containsCodec(codecs.VideoCodecs, CodecMPEG2Video) {
                                codecs.VideoCodecs = append(codecs.VideoCodecs, CodecMPEG2Video)
                        }

                case streamID == 0xBD:
                        // Private Stream 1 — contains AC3, DTS, LPCM sub-streams
                        // Parse the PES header to get to the sub-stream ID
                        if i+9 < len(buf) {
                                pesHeaderLen := int(buf[i+8])
                                subStreamOffset := i + 9 + pesHeaderLen
                                if subStreamOffset < len(buf) {
                                        subStreamID := buf[subStreamOffset]
                                        var ct CodecType
                                        switch {
                                        case subStreamID >= 0x80 && subStreamID <= 0x87:
                                                ct = CodecAC3Audio
                                        case subStreamID >= 0x88 && subStreamID <= 0x8F:
                                                ct = CodecDTSAudio
                                        case subStreamID >= 0xA0 && subStreamID <= 0xA7:
                                                ct = CodecLPCMAudio
                                        }
                                        if ct != CodecUnknown && !containsCodec(codecs.AudioCodecs, ct) {
                                                codecs.AudioCodecs = append(codecs.AudioCodecs, ct)
                                        }
                                }
                        }

                case streamID >= 0xC0 && streamID <= 0xDF:
                        // MPEG audio stream
                        if !containsCodec(codecs.AudioCodecs, CodecMPEGAudio) {
                                codecs.AudioCodecs = append(codecs.AudioCodecs, CodecMPEGAudio)
                        }
                }
        }

        if len(codecs.VideoCodecs) == 0 && len(codecs.AudioCodecs) == 0 {
                return nil, fmt.Errorf("no DVD codecs detected in scanned region")
        }

        return codecs, nil
}

// findContentVOBs navigates the ISO9660 filesystem to find all content VOBs
// (VTS_xx_1.VOB, the first part of each title set). Returns nil if navigation
// fails, signaling the caller to fall back to scanning from the ISO start.
// Uses readISOPVDRoot/readISODirectory/findISOEntry from iso.go.
func findContentVOBs(f *os.File) []isoFileExtent {
        rootLBA, rootLen, err := readISOPVDRoot(f)
        if err != nil {
                return nil
        }

        rootEntries, err := readISODirectory(f, rootLBA, rootLen)
        if err != nil {
                return nil
        }

        videoTS, err := findISOEntry(rootEntries, "VIDEO_TS")
        if err != nil {
                return nil
        }

        vtsEntries, err := readISODirectory(f, uint32(videoTS.Offset/isoSectorSize), uint32(videoTS.Size))
        if err != nil {
                return nil
        }

        // Collect VTS_xx_1.VOB entries — the first content VOB of each title set.
        // VTS_xx_0.VOB is navigation-only, and VTS_xx_2+ are continuations with
        // the same audio layout, so only _1 from each title set is needed.
        var vobs []isoFileExtent
        for _, e := range vtsEntries {
                if e.IsDir {
                        continue
                }
                if strings.HasPrefix(e.Name, "VTS_") && strings.HasSuffix(e.Name, ".VOB") {
                        if len(e.Name) == 12 && e.Name[7] == '1' {
                                vobs = append(vobs, e)
                        }
                }
        }
        return vobs
}

package source

import "fmt"

// MPEGTSParser parses MPEG Transport Stream (M2TS) files to extract elementary
// stream data. This is the Blu-ray equivalent of MPEGPSParser for DVDs.
//
// M2TS files use 192-byte packets: 4-byte timestamp + 188-byte TS packet.
// Each TS packet carries a fragment of a PES packet, identified by PID.
// PES packets span multiple TS packets and contain the actual codec data.
//
// The parser builds PES payload range tables that map ES offsets to raw file
// offsets, enabling the matcher to work with continuous ES data while the
// underlying file has TS headers interleaved.
type MPEGTSParser struct {
        data        []byte           // mmap'd file data (zero-copy); nil when using multiRegion
        multiRegion *multiRegionData // non-nil for multi-extent UDF files
        size        int64
        packetSize  int // 192 (M2TS) or 188 (standard TS)
        tsOffset    int // offset from packet start to TS sync byte (4 for M2TS, 0 for TS)

        // Stream PIDs from PMT
        videoPID   uint16
        audioPIDs  []uint16  // ordered by PMT appearance
        videoCodec CodecType // for user_data filtering decision

        // PES payload ranges (one entry per TS payload chunk for tracked PIDs)
        videoRanges         []PESPayloadRange
        filteredVideoRanges []PESPayloadRange // excludes user_data for MPEG-2 only
        audioBySubStream    map[byte][]PESPayloadRange

        // Audio PID → sub-stream ID mapping
        audioSubStreams []byte             // sequential IDs: 0, 1, 2, ...
        pidToSubStream  map[uint16]byte    // PID → sub-stream ID
        subStreamToPID  map[byte]uint16    // sub-stream ID → PID
        subStreamCodec  map[byte]CodecType // codec type per sub-stream

        filterUserData bool
}

// NewMPEGTSParser creates a parser for the given memory-mapped M2TS data.
func NewMPEGTSParser(data []byte) *MPEGTSParser {
        return &MPEGTSParser{
                data:             data,
                size:             int64(len(data)),
                audioBySubStream: make(map[byte][]PESPayloadRange),
                pidToSubStream:   make(map[uint16]byte),
                subStreamToPID:   make(map[byte]uint16),
                subStreamCodec:   make(map[byte]CodecType),
        }
}

// NewMPEGTSParserMultiRegion creates a parser for non-contiguous M2TS data
// from a multi-extent UDF file. The multiRegionData provides a virtual
// contiguous view over multiple mmap sub-slices.
func NewMPEGTSParserMultiRegion(mr *multiRegionData) *MPEGTSParser {
        return &MPEGTSParser{
                multiRegion:      mr,
                size:             mr.Len(),
                audioBySubStream: make(map[byte][]PESPayloadRange),
                pidToSubStream:   make(map[uint16]byte),
                subStreamToPID:   make(map[byte]uint16),
                subStreamCodec:   make(map[byte]CodecType),
        }
}

// dataSlice returns a sub-slice of the parser's data source.
// Uses multiRegion when available, otherwise direct slice of p.data.
func (p *MPEGTSParser) dataSlice(off, end int64) []byte {
        if p.multiRegion != nil {
                return p.multiRegion.Slice(off, end)
        }
        return p.data[off:end]
}

// MPEGTSProgressFunc is called to report MPEG-TS parsing progress.
type MPEGTSProgressFunc func(processed, total int64)

// --- ESReader interface implementation ---

// ReadESData reads elementary stream data at the given ES offset.
func (p *MPEGTSParser) ReadESData(esOffset int64, size int, isVideo bool) ([]byte, error) {
        if !isVideo {
                return nil, fmt.Errorf("audio uses per-sub-stream methods, use ReadAudioSubStreamData")
        }
        ranges := p.filteredVideoRanges
        if len(ranges) == 0 {
                ranges = p.videoRanges
        }
        return readFromRanges(p.data, p.multiRegion, p.size, ranges, esOffset, size)
}

// ESOffsetToFileOffset converts an ES offset to a file offset and remaining bytes.
func (p *MPEGTSParser) ESOffsetToFileOffset(esOffset int64, isVideo bool) (fileOffset int64, remaining int) {
        var ranges []PESPayloadRange
        if isVideo {
                ranges = p.filteredVideoRanges
                if len(ranges) == 0 {
                        ranges = p.videoRanges
                }
        } else {
                return -1, 0
        }

        idx := binarySearchRanges(ranges, esOffset)
        if idx < 0 {
                return -1, 0
        }
        r := ranges[idx]
        offsetInPayload := esOffset - r.ESOffset
        return r.FileOffset + offsetInPayload, r.Size - int(offsetInPayload)
}

// TotalESSize returns the total size of the elementary stream.
func (p *MPEGTSParser) TotalESSize(isVideo bool) int64 {
        if !isVideo {
                return 0
        }
        if p.filterUserData && len(p.filteredVideoRanges) > 0 {
                return totalESSizeFromRanges(p.filteredVideoRanges)
        }
        return totalESSizeFromRanges(p.videoRanges)
}

// AudioSubStreams returns the list of audio sub-stream IDs.
func (p *MPEGTSParser) AudioSubStreams() []byte {
        return p.audioSubStreams
}

// SubtitleSubStreams returns the sub-stream IDs that carry subtitle data (e.g., PGS).
func (p *MPEGTSParser) SubtitleSubStreams() []byte {
        var ids []byte
        for _, id := range p.audioSubStreams {
                if IsSubtitleCodec(p.subStreamCodec[id]) {
                        ids = append(ids, id)
                }
        }
        return ids
}

// AudioSubStreamESSize returns the ES size for a specific audio sub-stream.
func (p *MPEGTSParser) AudioSubStreamESSize(subStreamID byte) int64 {
        return totalESSizeFromRanges(p.audioBySubStream[subStreamID])
}

// ReadAudioSubStreamData reads audio data from a specific sub-stream.
func (p *MPEGTSParser) ReadAudioSubStreamData(subStreamID byte, esOffset int64, size int) ([]byte, error) {
        ranges, ok := p.audioBySubStream[subStreamID]
        if !ok {
                return nil, fmt.Errorf("audio sub-stream %d not found", subStreamID)
        }
        return readFromRanges(p.data, p.multiRegion, p.size, ranges, esOffset, size)
}

// --- ESRangeConverter interface implementation ---

// RawRangesForESRegion returns the raw file ranges for a video ES region.
func (p *MPEGTSParser) RawRangesForESRegion(esOffset int64, size int, isVideo bool) ([]RawRange, error) {
        if !isVideo {
                return nil, fmt.Errorf("audio uses per-sub-stream methods, use RawRangesForAudioSubStream")
        }
        ranges := p.filteredVideoRanges
        if len(ranges) == 0 {
                ranges = p.videoRanges
        }
        return rawRangesFromPESRanges(ranges, esOffset, size)
}

// RawRangesForAudioSubStream returns the raw file ranges for audio data from a specific sub-stream.
func (p *MPEGTSParser) RawRangesForAudioSubStream(subStreamID byte, esOffset int64, size int) ([]RawRange, error) {
        ranges, ok := p.audioBySubStream[subStreamID]
        if !ok {
                return nil, fmt.Errorf("audio sub-stream %d not found", subStreamID)
        }
        return rawRangesFromPESRanges(ranges, esOffset, size)
}

// --- Hint-based reading for matcher hot path ---

// ReadESByteWithHint reads a single byte from the ES stream with a range hint.
func (p *MPEGTSParser) ReadESByteWithHint(esOffset int64, isVideo bool, rangeHint int) (byte, int, bool) {
        if !isVideo {
                return 0, -1, false
        }
        ranges := p.filteredVideoRanges
        if len(ranges) == 0 {
                ranges = p.videoRanges
        }
        return readByteWithHint(p.data, p.multiRegion, p.size, ranges, esOffset, rangeHint)
}

// ReadAudioByteWithHint reads a single byte from an audio sub-stream with a range hint.
func (p *MPEGTSParser) ReadAudioByteWithHint(subStreamID byte, esOffset int64, rangeHint int) (byte, int, bool) {
        return readByteWithHint(p.data, p.multiRegion, p.size, p.audioBySubStream[subStreamID], esOffset, rangeHint)
}

// IsLPCMSubStream always returns false for MPEG-TS (LPCM is DVD-only).
func (p *MPEGTSParser) IsLPCMSubStream(_ byte) bool {
        return false
}

// --- Accessors for indexer ---

// Data returns the raw mmap'd file data for zero-copy access.
// Returns nil when using multi-region data; use DataSlice instead.
func (p *MPEGTSParser) Data() []byte {
        return p.data
}

// DataSlice returns a sub-slice of the backing data at the given offset and size.
// Works for both contiguous and multi-region data.
func (p *MPEGTSParser) DataSlice(off int64, size int) []byte {
        if p.multiRegion != nil {
                return p.multiRegion.Slice(off, off+int64(size))
        }
        return p.data[off : off+int64(size)]
}

// DataSize returns the total size of the backing data.
func (p *MPEGTSParser) DataSize() int64 {
        return p.size
}

// FilteredVideoRanges returns the filtered video payload ranges.
func (p *MPEGTSParser) FilteredVideoRanges() []PESPayloadRange {
        if p.filterUserData && len(p.filteredVideoRanges) > 0 {
                return p.filteredVideoRanges
        }
        return p.videoRanges
}

// FilteredAudioRanges returns the audio payload ranges for a specific sub-stream.
func (p *MPEGTSParser) FilteredAudioRanges(subStreamID byte) []PESPayloadRange {
        return p.audioBySubStream[subStreamID]
}

// RawVideoESSize returns the total size of raw (unfiltered) video ES.
func (p *MPEGTSParser) RawVideoESSize() int64 {
        return totalESSizeFromRanges(p.videoRanges)
}

// FilteredVideoRangesCount returns the number of filtered video ranges.
func (p *MPEGTSParser) FilteredVideoRangesCount() int {
        return len(p.filteredVideoRanges)
}

// AudioSubStreamCount returns the number of audio sub-streams.
func (p *MPEGTSParser) AudioSubStreamCount() int {
        return len(p.audioSubStreams)
}

// VideoPID returns the video PID detected from the PMT.
func (p *MPEGTSParser) VideoPID() uint16 {
        return p.videoPID
}

// AudioPIDs returns the audio PIDs detected from the PMT.
func (p *MPEGTSParser) AudioPIDs() []uint16 {
        return p.audioPIDs
}

// VideoCodec returns the video codec type detected from the PMT.
func (p *MPEGTSParser) VideoCodec() CodecType {
        return p.videoCodec
}

// Ensure MPEGTSParser implements the required interfaces at compile time.
var (
        _ ESReader         = (*MPEGTSParser)(nil)
        _ ESRangeConverter = (*MPEGTSParser)(nil)
)

package source

import (
        "fmt"
        "io"
        "os"
        "path/filepath"
        "strings"
)

// detectBlurayCodecs scans PMTs from indexed M2TS files to detect codecs.
// This is a fallback for when the pre-index DetectSourceCodecsFromDir check
// was skipped (e.g., detection failure).
func detectBlurayCodecs(index *Index) (*SourceCodecs, error) {
        if len(index.Files) == 0 {
                return nil, fmt.Errorf("no source files in index")
        }
        // Deduplicate by path — for ISOs the indexer creates multiple entries
        // sharing the same RelativePath, and we only need to scan each file once.
        seen := make(map[string]struct{})
        var targets []codecScanTarget
        for _, f := range index.Files {
                fullPath := filepath.Join(index.SourceDir, f.RelativePath)
                if _, ok := seen[fullPath]; ok {
                        continue
                }
                seen[fullPath] = struct{}{}
                targets = append(targets, codecScanTarget{
                        Path: fullPath,
                        Size: f.Size,
                })
        }
        return detectBlurayCodecsMulti(significantTargets(targets))
}

// detectBlurayCodecsMulti scans multiple M2TS files or ISOs and unions their
// codec information. ISO files are handled correctly via detectBlurayCodecsFromFile
// which parses their internal M2TS structure. Returns an error if no file could
// be scanned.
func detectBlurayCodecsMulti(targets []codecScanTarget) (*SourceCodecs, error) {
        if len(targets) == 0 {
                return nil, fmt.Errorf("no Blu-ray media files to scan")
        }
        merged := &SourceCodecs{}
        var lastErr error
        anySuccess := false
        for _, t := range targets {
                codecs, err := detectBlurayCodecsFromFile(t.Path)
                if err != nil {
                        lastErr = err
                        continue
                }
                mergeSourceCodecs(merged, codecs)
                anySuccess = true
        }
        if !anySuccess {
                if lastErr != nil {
                        return nil, fmt.Errorf("failed to scan any Blu-ray codecs: %w", lastErr)
                }
                return nil, fmt.Errorf("failed to scan any Blu-ray codecs")
        }
        return merged, nil
}

// detectBlurayCodecsFromFile detects codecs from a single M2TS file or a
// Blu-ray ISO. For ISOs, it first tries parsing CLPI metadata files which
// authoritatively declare all streams, falling back to PMT scanning.
func detectBlurayCodecsFromFile(path string) (*SourceCodecs, error) {
        if strings.HasSuffix(strings.ToLower(path), ".iso") {
                return detectBlurayCodecsFromISO(path)
        }
        return scanM2TSCodecs(path, 0)
}

// detectBlurayCodecsFromISO detects codecs from a Blu-ray ISO. Tries CLPI
// metadata first (fast, authoritative), falls back to PMT scanning from M2TS data.
func detectBlurayCodecsFromISO(path string) (*SourceCodecs, error) {
        f, err := os.Open(path)
        if err != nil {
                return nil, fmt.Errorf("open ISO: %w", err)
        }
        defer f.Close()

        // Try CLPI-based detection (ISO9660, then UDF).
        if clpis, err := findCLPIsInISO(f); err == nil && len(clpis) > 0 {
                if codecs, err := detectBlurayCodecsFromCLPIs(f, clpis); err == nil {
                        return codecs, nil
                }
        }
        if clpis, err := findCLPIsInUDF(f); err == nil && len(clpis) > 0 {
                if codecs, err := detectBlurayCodecsFromCLPIs(f, clpis); err == nil {
                        return codecs, nil
                }
        }

        // Fallback: scan PMT from M2TS data.
        return detectBlurayCodecsFromISOPMT(path)
}

// detectBlurayCodecsFromISOPMT scans PMT data from M2TS files within a Blu-ray ISO.
// This is the legacy approach, kept as a fallback for ISOs where CLPI parsing fails.
func detectBlurayCodecsFromISOPMT(path string) (*SourceCodecs, error) {
        m2tsFiles, err := findBlurayM2TSInISO(path)
        if err != nil {
                return nil, fmt.Errorf("find M2TS in ISO: %w", err)
        }
        if len(m2tsFiles) == 0 {
                return nil, fmt.Errorf("no M2TS files found in Blu-ray ISO")
        }

        merged := &SourceCodecs{}
        var lastErr error
        anySuccess := false
        for _, m := range significantFiles(m2tsFiles) {
                codecs, err := scanM2TSCodecs(path, m.Offset)
                if err != nil {
                        lastErr = err
                        continue
                }
                mergeSourceCodecs(merged, codecs)
                anySuccess = true
        }
        if !anySuccess {
                if lastErr != nil {
                        return nil, fmt.Errorf("failed to scan any M2TS in ISO: %w", lastErr)
                }
                return nil, fmt.Errorf("failed to scan any M2TS in ISO")
        }
        return merged, nil
}

// scanM2TSCodecs reads 2MB of M2TS data at the given offset and parses the
// PAT/PMT to extract codec information from a single M2TS stream.
func scanM2TSCodecs(path string, readOffset int64) (*SourceCodecs, error) {
        f, err := os.Open(path)
        if err != nil {
                return nil, fmt.Errorf("open file: %w", err)
        }
        defer f.Close()

        const scanSize = 2 * 1024 * 1024
        buf := make([]byte, scanSize)
        n, err := f.ReadAt(buf, readOffset)
        if err == io.EOF || err == io.ErrUnexpectedEOF {
                buf = buf[:n]
        } else if err != nil {
                return nil, fmt.Errorf("read M2TS data from %s: %w", path, err)
        }
        if n == 0 {
                return nil, fmt.Errorf("no M2TS data at offset %d in %s", readOffset, path)
        }
        buf = buf[:n]

        if len(buf) < 192*4 {
                return nil, fmt.Errorf("M2TS data too small to detect TS structure (%d bytes)", len(buf))
        }

        return parseTSCodecs(buf)
}

// parseTSCodecs scans MPEG-TS data to find the PAT and PMT and extract stream types.
// This uses reassemblePSISection to correctly handle PMTs that span multiple TS
// packets (common on Blu-rays with many audio and subtitle streams).
func parseTSCodecs(data []byte) (*SourceCodecs, error) {
        // Detect TS packet size: 188 (standard) or 192 (M2TS with 4-byte timestamp)
        packetSize, startOffset := detectTSPacketSize(data)
        if packetSize == 0 {
                return nil, fmt.Errorf("cannot detect TS packet size")
        }
        tsOffset := 0
        if packetSize == 192 {
                tsOffset = 4
        }

        // Step 1: Find PAT (PID 0x0000) to get PMT PID
        patSection, err := reassemblePSISection(data, startOffset, packetSize, tsOffset, 0, 0x00)
        if err != nil {
                return nil, fmt.Errorf("find PAT: %w", err)
        }

        pmtPID := pmtPIDFromPAT(patSection)
        if pmtPID == 0 {
                return nil, fmt.Errorf("PMT PID not found in PAT")
        }

        // Step 2: Reassemble complete PMT section (may span multiple TS packets)
        pmtSection, err := reassemblePSISection(data, startOffset, packetSize, tsOffset, pmtPID, 0x02)
        if err != nil {
                return nil, fmt.Errorf("find PMT: %w", err)
        }

        // Step 3: Extract stream types from the reassembled PMT
        codecs := &SourceCodecs{}
        if len(pmtSection) >= 12 {
                progInfoLen := int(pmtSection[10]&0x0F)<<8 | int(pmtSection[11])
                streamsStart := 12 + progInfoLen
                sectionLen := int(pmtSection[1]&0x0F)<<8 | int(pmtSection[2])
                streamsEnd := 3 + sectionLen - 4 // exclude CRC32
                if streamsEnd > len(pmtSection) {
                        streamsEnd = len(pmtSection)
                }

                for j := streamsStart; j+5 <= streamsEnd; {
                        streamType := pmtSection[j]
                        esInfoLen := int(pmtSection[j+3]&0x0F)<<8 | int(pmtSection[j+4])

                        ct := tsStreamTypeToCodecType(streamType)
                        if ct != CodecUnknown {
                                if IsVideoCodec(ct) {
                                        if !containsCodec(codecs.VideoCodecs, ct) {
                                                codecs.VideoCodecs = append(codecs.VideoCodecs, ct)
                                        }
                                } else if IsAudioCodec(ct) {
                                        if !containsCodec(codecs.AudioCodecs, ct) {
                                                codecs.AudioCodecs = append(codecs.AudioCodecs, ct)
                                        }
                                } else if IsSubtitleCodec(ct) {
                                        if !containsCodec(codecs.SubtitleCodecs, ct) {
                                                codecs.SubtitleCodecs = append(codecs.SubtitleCodecs, ct)
                                        }
                                }
                        }

                        next := j + 5 + esInfoLen
                        if next < j || next > streamsEnd {
                                break
                        }
                        j = next
                }
        }

        return codecs, nil
}

// tsStreamTypeToCodecType maps MPEG-TS stream type values to CodecType.
func tsStreamTypeToCodecType(streamType byte) CodecType {
        switch streamType {
        case 0x01:
                return CodecMPEG1Video
        case 0x02:
                return CodecMPEG2Video
        case 0x1B:
                return CodecH264Video
        case 0x24:
                return CodecH265Video
        case 0xEA:
                return CodecVC1Video
        case 0x03, 0x04:
                return CodecMPEGAudio
        case 0x0F:
                return CodecAACaudio
        case 0x80:
                return CodecLPCMAudio
        case 0x81:
                return CodecAC3Audio
        case 0x82:
                return CodecDTSAudio
        case 0x83:
                return CodecTrueHDAudio
        case 0x84:
                return CodecEAC3Audio
        case 0x85, 0x86:
                return CodecDTSHDAudio
        case 0x90:
                return CodecPGSSubtitle
        default:
                return CodecUnknown
        }
}

// detectTSPacketSize determines TS packet size (188 or 192) and the offset to
// the first sync byte. Returns (0, 0) if no valid TS structure is found.
func detectTSPacketSize(data []byte) (int, int) {
        // Try both M2TS (192-byte packets) and standard TS (188-byte packets)
        for _, size := range []int{192, 188} {
                for startOffset := 0; startOffset < size && startOffset+size*3 < len(data); startOffset++ {
                        syncOffset := startOffset
                        if size == 192 {
                                syncOffset += 4 // M2TS timestamp prefix
                        }
                        if syncOffset >= len(data) || data[syncOffset] != 0x47 {
                                continue
                        }
                        // Verify 3 consecutive sync bytes
                        valid := true
                        for k := 1; k <= 3; k++ {
                                nextSync := startOffset + k*size
                                if size == 192 {
                                        nextSync += 4
                                }
                                if nextSync >= len(data) || data[nextSync] != 0x47 {
                                        valid = false
                                        break
                                }
                        }
                        if valid {
                                return size, startOffset
                        }
                }
        }
        return 0, 0
}

package source

import "log"

// splitDTSHDCoreStreams detects DTS-HD audio streams that contain an embedded
// DTS core and extracts the core into a separate sub-stream. On Blu-ray,
// DTS-HD streams (PMT types 0x85/0x86) embed DTS core frames followed by
// extension data (ExSS: XBR, XLL, XXCh) in the same PID. Video extraction tools may
// extract either the full DTS-HD stream (A_DTS/LOSSLESS) or just the DTS core
// (A_DTS).
//
// Unlike TrueHD+AC3 where the original is replaced, here we keep the original
// combined sub-stream (for A_DTS/LOSSLESS matching) and add a new core-only
// sub-stream (for A_DTS matching).
func (p *MPEGTSParser) splitDTSHDCoreStreams() {
        for _, subID := range p.audioSubStreams {
                if p.subStreamCodec[subID] != CodecDTSHDAudio {
                        continue
                }
                ranges := p.audioBySubStream[subID]
                if len(ranges) == 0 {
                        continue
                }

                // Check if this stream actually has both DTS core and DTS-HD extension
                if !p.detectCombinedDTSHDCore(ranges) {
                        continue
                }

                // Split out the DTS core ranges
                coreRanges := p.splitDTSHDCoreRanges(ranges)
                if len(coreRanges) == 0 {
                        continue
                }

                coreRanges = mergeAdjacentRanges(coreRanges)

                // Keep original combined sub-stream for A_DTS/LOSSLESS matching.
                // Add DTS core as a new sub-stream for A_DTS matching.
                newSubID := byte(len(p.audioSubStreams))
                p.audioBySubStream[newSubID] = coreRanges
                p.subStreamCodec[newSubID] = CodecDTSAudio
                p.audioSubStreams = append(p.audioSubStreams, newSubID)
        }
}

// detectCombinedDTSHDCore checks if a DTS-HD audio stream contains both
// DTS core frames and DTS-HD extension (ExSS) frames by scanning the first
// few KB of ES data for both sync patterns.
func (p *MPEGTSParser) detectCombinedDTSHDCore(ranges []PESPayloadRange) bool {
        hasDTSCore := false
        hasDTSHDExSS := false
        bytesChecked := 0
        const maxCheck = 16 * 1024

        for _, r := range ranges {
                if bytesChecked >= maxCheck {
                        break
                }
                endOffset := r.FileOffset + int64(r.Size)
                if endOffset > p.size {
                        continue
                }
                data := p.dataSlice(r.FileOffset, endOffset)
                remaining := maxCheck - bytesChecked
                if remaining < len(data) {
                        data = data[:remaining]
                }
                for i := 0; i < len(data)-3; i++ {
                        // DTS core sync: 7F FE 80 01
                        if data[i] == 0x7F && data[i+1] == 0xFE &&
                                data[i+2] == 0x80 && data[i+3] == 0x01 {
                                hasDTSCore = true
                        }
                        // DTS-HD ExSS sync: 64 58 20 25
                        if data[i] == 0x64 && data[i+1] == 0x58 &&
                                data[i+2] == 0x20 && data[i+3] == 0x25 {
                                hasDTSHDExSS = true
                        }
                        if hasDTSCore && hasDTSHDExSS {
                                return true
                        }
                }
                bytesChecked += len(data)
        }
        return false
}

// detectActualDTSCoreSize reads the beginning of a DTS-HD stream's ES data
// to determine the actual core frame size. In DTS-HD MA/HRA streams, the FSIZE
// field in the DTS core header reports the full access unit size (core + extension),
// not just the core portion. This function finds the real core boundary by
// scanning for the ExSS sync word (64 58 20 25) or the next DTS core sync word.
//
// Returns the actual core frame size in bytes, or 0 if it cannot be determined.
func (p *MPEGTSParser) detectActualDTSCoreSize(ranges []PESPayloadRange) int {
        // Read up to 32KB of ES data — enough for several frames at any bitrate.
        const maxRead = 32 * 1024
        buf := make([]byte, 0, maxRead)
        for _, r := range ranges {
                if len(buf) >= maxRead {
                        break
                }
                endOffset := r.FileOffset + int64(r.Size)
                if endOffset > p.size {
                        continue
                }
                data := p.dataSlice(r.FileOffset, endOffset)
                remaining := maxRead - len(buf)
                if len(data) > remaining {
                        data = data[:remaining]
                }
                buf = append(buf, data...)
        }

        // Find all DTS core sync positions to measure frame boundaries.
        var syncPositions []int
        for i := 0; i+6 < len(buf); i++ {
                if buf[i] == 0x7F && buf[i+1] == 0xFE &&
                        buf[i+2] == 0x80 && buf[i+3] == 0x01 {
                        if DTSCoreFrameSize(buf[i:i+7]) > 0 {
                                syncPositions = append(syncPositions, i)
                        }
                }
        }
        if len(syncPositions) == 0 {
                return 0
        }

        dtsSyncPos := syncPositions[0]

        // Find actual core boundary from the first frame by scanning for ExSS
        // sync or next DTS sync.
        coreSize := 0
        for i := dtsSyncPos + 7; i+3 < len(buf); i++ {
                // ExSS sync: 64 58 20 25
                if buf[i] == 0x64 && buf[i+1] == 0x58 &&
                        buf[i+2] == 0x20 && buf[i+3] == 0x25 {
                        coreSize = i - dtsSyncPos
                        break
                }
                // Next DTS core sync: 7F FE 80 01 (validated)
                if buf[i] == 0x7F && buf[i+1] == 0xFE &&
                        buf[i+2] == 0x80 && buf[i+3] == 0x01 {
                        if i+6 < len(buf) && DTSCoreFrameSize(buf[i:i+7]) > 0 {
                                coreSize = i - dtsSyncPos
                                break
                        }
                }
        }

        if coreSize == 0 {
                // Could not find boundary — fall back to FSIZE from header.
                return DTSCoreFrameSize(buf[dtsSyncPos : dtsSyncPos+7])
        }

        // Validate that the detected core size is consistent across additional
        // frames. DTS core on Blu-ray uses CBR, so the core portion of each
        // access unit should be the same size. The DTS-HD extension data can
        // vary in size (making total access units differ), so we validate the
        // core boundary directly: at syncPos + coreSize we expect either an
        // ExSS sync word (64 58 20 25) or the next DTS core sync word.
        // The first frame's boundary is already validated (it produced coreSize),
        // so if the buffer is too short to check any additional frames we still
        // trust the measurement.
        for _, sp := range syncPositions[1:] {
                boundary := sp + coreSize
                if boundary+3 >= len(buf) {
                        break
                }
                // ExSS sync at expected boundary — core size is correct (4 bytes needed)
                if buf[boundary] == 0x64 && buf[boundary+1] == 0x58 &&
                        buf[boundary+2] == 0x20 && buf[boundary+3] == 0x25 {
                        continue
                }
                // Next DTS core sync at boundary — no extension in this frame,
                // but core size still matches. Validate the header to avoid false
                // positives from extension data containing the sync word pattern.
                // Requires 7 bytes for DTSCoreFrameSize validation.
                if boundary+6 < len(buf) &&
                        buf[boundary] == 0x7F && buf[boundary+1] == 0xFE &&
                        buf[boundary+2] == 0x80 && buf[boundary+3] == 0x01 &&
                        DTSCoreFrameSize(buf[boundary:boundary+7]) > 0 {
                        continue
                }
                // Neither marker at expected boundary — core size may be wrong.
                // If there aren't enough bytes for DTS header validation, don't
                // treat it as a mismatch — just stop checking.
                if boundary+6 >= len(buf) {
                        break
                }
                log.Printf("mpegts: warning: DTS core boundary mismatch at offset %d (expected ExSS or DTS sync at +%d); skipping core extraction", sp, coreSize)
                return 0
        }

        return coreSize
}

// splitDTSHDCoreRanges extracts DTS core frame ranges from a combined DTS-HD
// stream. It walks through PES payload ranges, identifies DTS core frames by
// their sync word, and collects only the core bytes (excluding DTS-HD extension
// data).
//
// In DTS-HD streams, the FSIZE header field reports the full access unit size
// (core + extension), not the core-only size. We detect the actual core size
// by scanning for the ExSS boundary in detectActualDTSCoreSize.
func (p *MPEGTSParser) splitDTSHDCoreRanges(ranges []PESPayloadRange) []PESPayloadRange {
        // Detect actual core frame size by scanning the stream.
        actualCoreSize := p.detectActualDTSCoreSize(ranges)
        if actualCoreSize <= 0 {
                return nil
        }

        var coreRanges []PESPayloadRange
        var coreES int64   // cumulative ES offset for core output
        coreRemaining := 0 // bytes remaining in current DTS core frame

        // Buffer for DTS core header detection across range boundaries.
        // We need bytes 0-6: 4-byte sync word + 3 bytes for frame size field.
        var headerBuf [7]byte
        headerBufLen := 0
        var headerPendingRanges []PESPayloadRange

        for _, r := range ranges {
                endOffset := r.FileOffset + int64(r.Size)
                if endOffset > p.size {
                        continue
                }
                data := p.dataSlice(r.FileOffset, endOffset)
                pos := 0

                // Handle header bytes buffered from previous range
                if headerBufLen > 0 && coreRemaining == 0 {
                        need := 7 - headerBufLen
                        if need > len(data) {
                                // This range doesn't have enough bytes to complete the
                                // 7-byte header. Buffer these bytes and continue accumulating
                                // across subsequent ranges until we have a full 7-byte header.
                                copy(headerBuf[headerBufLen:], data)
                                headerBufLen += len(data)
                                headerPendingRanges = append(headerPendingRanges, r)
                                // Move to the next range; do not rescan these bytes individually.
                                continue
                        } else {
                                copy(headerBuf[headerBufLen:], data[:need])
                                if DTSCoreFrameSize(headerBuf[:7]) > 0 {
                                        // Valid DTS core frame spanning range boundary.
                                        // Add any intermediate pending ranges to core.
                                        for _, pr := range headerPendingRanges {
                                                coreRanges = append(coreRanges, PESPayloadRange{
                                                        FileOffset: pr.FileOffset,
                                                        Size:       pr.Size,
                                                        ESOffset:   coreES,
                                                })
                                                coreES += int64(pr.Size)
                                        }
                                        headerPendingRanges = nil
                                        coreRanges = append(coreRanges, PESPayloadRange{
                                                FileOffset: r.FileOffset,
                                                Size:       need,
                                                ESOffset:   coreES,
                                        })
                                        coreES += int64(need)
                                        // Use detected core size, not FSIZE. Subtract the 7 header
                                        // bytes already consumed (from buffer + current range).
                                        coreRemaining = actualCoreSize - 7
                                        pos = need
                                        headerBufLen = 0
                                        goto scanLoop
                                }
                                // Not a valid DTS core header — discard buffered bytes (they're extension data).
                                // Re-attribute the optimistic core range back (remove it).
                                if len(coreRanges) > 0 {
                                        last := coreRanges[len(coreRanges)-1]
                                        coreRanges = coreRanges[:len(coreRanges)-1]
                                        coreES -= int64(last.Size)
                                }
                                headerPendingRanges = nil
                                headerBufLen = 0
                        }
                }

        scanLoop:
                for pos < len(data) {
                        if coreRemaining > 0 {
                                // Inside a DTS core frame — consume bytes
                                consume := coreRemaining
                                if consume > len(data)-pos {
                                        consume = len(data) - pos
                                }
                                coreRanges = append(coreRanges, PESPayloadRange{
                                        FileOffset: r.FileOffset + int64(pos),
                                        Size:       consume,
                                        ESOffset:   coreES,
                                })
                                coreES += int64(consume)
                                coreRemaining -= consume
                                pos += consume
                                continue
                        }

                        // Look for DTS core sync word (need 7 bytes: 4-byte sync + 3 for frame size)
                        if pos+6 < len(data) &&
                                data[pos] == 0x7F && data[pos+1] == 0xFE &&
                                data[pos+2] == 0x80 && data[pos+3] == 0x01 {
                                if DTSCoreFrameSize(data[pos:pos+7]) > 0 {
                                        coreRemaining = actualCoreSize
                                        continue // will be consumed in coreRemaining branch
                                }
                        }

                        // Not DTS core data (extension or other) — skip forward to next
                        // potential DTS core sync word or end of range
                        pos++
                        for pos < len(data) {
                                if pos+6 < len(data) &&
                                        data[pos] == 0x7F && data[pos+1] == 0xFE &&
                                        data[pos+2] == 0x80 && data[pos+3] == 0x01 {
                                        if DTSCoreFrameSize(data[pos:pos+7]) > 0 {
                                                break
                                        }
                                }
                                pos++
                        }
                }

                // After processing, check if trailing bytes could be a partial DTS core header
                if coreRemaining == 0 && len(data) > 0 {
                        // Look for 0x7F (start of DTS sync word) near end of range.
                        // We need up to 7 bytes (4-byte sync + 3 bytes) for DTSCoreFrameSize(),
                        // so search the last 6 bytes in case the sync word starts at len(data)-6
                        // or len(data)-5 and continues into the next range.
                        checkStart := len(data) - 6
                        if checkStart < 0 {
                                checkStart = 0
                        }
                        bufStart := -1
                        for j := len(data) - 1; j >= checkStart; j-- {
                                if data[j] == 0x7F {
                                        bufStart = j
                                        break
                                }
                        }
                        if bufStart >= 0 {
                                tailLen := len(data) - bufStart
                                copy(headerBuf[:], data[bufStart:])
                                headerBufLen = tailLen
                                // Add trimmed bytes to core optimistically
                                coreRanges = append(coreRanges, PESPayloadRange{
                                        FileOffset: r.FileOffset + int64(bufStart),
                                        Size:       tailLen,
                                        ESOffset:   coreES,
                                })
                                coreES += int64(tailLen)
                        }
                }
        }

        // If we ended with buffered bytes, they weren't a valid DTS core header — remove
        if headerBufLen > 0 {
                if len(coreRanges) > 0 {
                        last := coreRanges[len(coreRanges)-1]
                        coreRanges = coreRanges[:len(coreRanges)-1]
                        coreES -= int64(last.Size)
                }
        }

        return coreRanges
}

package source

import (
        "bytes"
        "fmt"
        "log"
)

// Parse scans the file and extracts all PES payload ranges.
func (p *MPEGTSParser) Parse() error {
        return p.ParseWithProgress(nil)
}

// ParseWithProgress scans the M2TS file with progress reporting.
func (p *MPEGTSParser) ParseWithProgress(progress MPEGTSProgressFunc) error {
        if p.multiRegion != nil {
                return p.parseMultiRegion(progress)
        }

        // Step 1: Detect TS packet size
        detectLen := 192 * 16
        if detectLen > len(p.data) {
                detectLen = len(p.data)
        }
        packetSize, startOffset := detectTSPacketSize(p.data[:detectLen])
        if packetSize == 0 {
                return fmt.Errorf("cannot detect TS packet size")
        }
        p.packetSize = packetSize
        if packetSize == 192 {
                p.tsOffset = 4
        }

        // Step 2: Parse PAT/PMT to find stream PIDs
        scanLen := 2 * 1024 * 1024
        if scanLen > len(p.data) {
                scanLen = len(p.data)
        }
        if err := p.parsePATandPMT(p.data[:scanLen], startOffset); err != nil {
                return fmt.Errorf("parse PAT/PMT: %w", err)
        }

        // Step 3: Scan packets and build PES ranges
        ss := p.initScanState()
        p.scanPackets(p.data, startOffset, 0, ss, progress)

        if progress != nil {
                progress(p.size, p.size)
        }

        return p.finalizeParse()
}

// parseMultiRegion handles parsing when data comes from multiple non-contiguous
// mmap regions. Processes each region sequentially, handling TS packets that
// straddle region boundaries via a small carryover buffer.
func (p *MPEGTSParser) parseMultiRegion(progress MPEGTSProgressFunc) error {
        mr := p.multiRegion
        if len(mr.regions) == 0 {
                return fmt.Errorf("no regions in multi-region data")
        }

        // Step 1: Detect TS packet size from first region
        firstRegion := mr.regions[0].data
        detectLen := 192 * 16
        if detectLen > len(firstRegion) {
                detectLen = len(firstRegion)
        }
        packetSize, startOffset := detectTSPacketSize(firstRegion[:detectLen])
        if packetSize == 0 {
                return fmt.Errorf("cannot detect TS packet size")
        }
        p.packetSize = packetSize
        if packetSize == 192 {
                p.tsOffset = 4
        }

        // Step 2: Parse PAT/PMT from first region
        scanLen := 2 * 1024 * 1024
        if scanLen > len(firstRegion) {
                scanLen = len(firstRegion)
        }
        if err := p.parsePATandPMT(firstRegion[:scanLen], startOffset); err != nil {
                return fmt.Errorf("parse PAT/PMT: %w", err)
        }

        // Step 3: Scan packets across all regions
        ss := p.initScanState()

        var carryover []byte
        for i, reg := range mr.regions {
                chunk := reg.data
                logicalBase := reg.logicalStart
                chunkStart := 0

                if i == 0 {
                        // First region: skip to the initial start offset
                        chunkStart = startOffset
                }

                // Handle carryover from previous region boundary
                if len(carryover) > 0 {
                        needed := p.packetSize - len(carryover)
                        if needed <= len(chunk) {
                                // Assemble the straddling packet and process it
                                bridgePkt := make([]byte, p.packetSize)
                                copy(bridgePkt, carryover)
                                copy(bridgePkt[len(carryover):], chunk[:needed])
                                bridgeBase := logicalBase - int64(len(carryover))
                                p.scanPackets(bridgePkt, 0, bridgeBase, ss, nil)
                                chunkStart = needed
                                carryover = nil
                        } else {
                                // Region too small to complete the packet — accumulate and continue
                                carryover = append(carryover, chunk...)
                                continue
                        }
                }

                // Process complete packets in this region
                available := len(chunk) - chunkStart
                nComplete := (available / p.packetSize) * p.packetSize
                if nComplete > 0 {
                        p.scanPackets(chunk[chunkStart:chunkStart+nComplete], 0, logicalBase+int64(chunkStart), ss, progress)
                }

                // Save any remainder for the next region
                remainder := available - nComplete
                if remainder > 0 {
                        carryover = make([]byte, remainder)
                        copy(carryover, chunk[chunkStart+nComplete:])
                }
        }

        if len(carryover) > 0 {
                log.Printf("mpegts: warning: discarding %d carryover bytes at end of multi-region data (incomplete TS packet)", len(carryover))
        }

        if progress != nil {
                progress(p.size, p.size)
        }

        return p.finalizeParse()
}

// pesState tracks PES header parsing state across TS packets.
type pesState struct {
        headerBytesRemaining int
}

// scanState holds mutable state for the packet scanning loop.
type scanState struct {
        trackedPIDs    map[uint16]bool
        pesStates      map[uint16]*pesState
        videoESOffset  int64
        audioESOffsets map[byte]int64
        lastProgress   int64
}

// initScanState sets up PID tracking and PES state for scanning.
func (p *MPEGTSParser) initScanState() *scanState {
        if p.videoPID == 0 && len(p.audioPIDs) == 0 {
                return nil
        }

        trackedPIDs := make(map[uint16]bool)
        if p.videoPID != 0 {
                trackedPIDs[p.videoPID] = true
        }
        for _, pid := range p.audioPIDs {
                trackedPIDs[pid] = true
        }

        // Pre-allocate range slices
        estimatedPackets := int(p.size) / p.packetSize
        if p.videoPID != 0 {
                p.videoRanges = make([]PESPayloadRange, 0, estimatedPackets*7/10)
        }
        for _, pid := range p.audioPIDs {
                subID := p.pidToSubStream[pid]
                p.audioBySubStream[subID] = make([]PESPayloadRange, 0, estimatedPackets/10/len(p.audioPIDs))
        }

        pesStates := make(map[uint16]*pesState)
        for pid := range trackedPIDs {
                pesStates[pid] = &pesState{}
        }

        return &scanState{
                trackedPIDs:    trackedPIDs,
                pesStates:      pesStates,
                audioESOffsets: make(map[byte]int64),
        }
}

// scanPackets processes TS packets in a data buffer, recording PES payload ranges.
// logicalBase is added to all FileOffset values to produce logical (assembled) offsets.
func (p *MPEGTSParser) scanPackets(data []byte, startPos int, logicalBase int64, ss *scanState, progress MPEGTSProgressFunc) {
        if ss == nil {
                return
        }

        for pos := startPos; pos+p.packetSize <= len(data); pos += p.packetSize {
                tsStart := pos + p.tsOffset
                if tsStart >= len(data) || data[tsStart] != 0x47 {
                        continue
                }

                pid := uint16(data[tsStart+1]&0x1F)<<8 | uint16(data[tsStart+2])
                if !ss.trackedPIDs[pid] {
                        continue
                }

                pusi := data[tsStart+1]&0x40 != 0
                adaptFieldCtrl := (data[tsStart+3] >> 4) & 0x03

                // Find payload start
                payloadOff := tsStart + 4
                switch adaptFieldCtrl {
                case 0x01: // payload only
                case 0x03: // adaptation field + payload
                        if payloadOff < pos+p.packetSize {
                                adaptLen := int(data[payloadOff])
                                payloadOff += 1 + adaptLen
                        }
                default: // 0x02 = adaptation only, 0x00 = reserved
                        continue
                }

                payloadEnd := pos + p.packetSize
                if payloadEnd > len(data) {
                        payloadEnd = len(data)
                }
                if payloadOff >= payloadEnd {
                        continue
                }

                payload := data[payloadOff:payloadEnd]
                state := ss.pesStates[pid]

                // File offset in the logical (assembled) coordinate space
                logPayloadOff := logicalBase + int64(payloadOff)

                if pusi {
                        // New PES packet starts here
                        if len(payload) < 9 || payload[0] != 0 || payload[1] != 0 || payload[2] != 1 {
                                continue
                        }
                        pesHeaderDataLen := int(payload[8])
                        pesHeaderSize := 9 + pesHeaderDataLen

                        if pesHeaderSize >= len(payload) {
                                state.headerBytesRemaining = pesHeaderSize - len(payload)
                                continue
                        }

                        esPayload := payload[pesHeaderSize:]
                        fileOffset := logPayloadOff + int64(pesHeaderSize)

                        if pid == p.videoPID {
                                p.videoRanges = append(p.videoRanges, PESPayloadRange{
                                        FileOffset: fileOffset,
                                        Size:       len(esPayload),
                                        ESOffset:   ss.videoESOffset,
                                })
                                ss.videoESOffset += int64(len(esPayload))
                        } else {
                                subID := p.pidToSubStream[pid]
                                p.audioBySubStream[subID] = append(p.audioBySubStream[subID], PESPayloadRange{
                                        FileOffset: fileOffset,
                                        Size:       len(esPayload),
                                        ESOffset:   ss.audioESOffsets[subID],
                                })
                                ss.audioESOffsets[subID] += int64(len(esPayload))
                        }
                        state.headerBytesRemaining = 0
                } else {
                        // Continuation packet
                        esPayload := payload
                        fileOffset := logPayloadOff

                        if state.headerBytesRemaining > 0 {
                                if state.headerBytesRemaining >= len(esPayload) {
                                        state.headerBytesRemaining -= len(esPayload)
                                        continue
                                }
                                esPayload = esPayload[state.headerBytesRemaining:]
                                fileOffset += int64(state.headerBytesRemaining)
                                state.headerBytesRemaining = 0
                        }

                        if len(esPayload) == 0 {
                                continue
                        }

                        if pid == p.videoPID {
                                p.videoRanges = append(p.videoRanges, PESPayloadRange{
                                        FileOffset: fileOffset,
                                        Size:       len(esPayload),
                                        ESOffset:   ss.videoESOffset,
                                })
                                ss.videoESOffset += int64(len(esPayload))
                        } else {
                                subID := p.pidToSubStream[pid]
                                p.audioBySubStream[subID] = append(p.audioBySubStream[subID], PESPayloadRange{
                                        FileOffset: fileOffset,
                                        Size:       len(esPayload),
                                        ESOffset:   ss.audioESOffsets[subID],
                                })
                                ss.audioESOffsets[subID] += int64(len(esPayload))
                        }
                }

                // Report progress
                logPos := logicalBase + int64(pos)
                if progress != nil && logPos-ss.lastProgress > 100*1024*1024 {
                        progress(logPos, p.size)
                        ss.lastProgress = logPos
                }
        }
}

// finalizeParse performs post-scan processing: video range filtering and
// TrueHD+AC3 stream splitting. Shared by contiguous and multi-region paths.
func (p *MPEGTSParser) finalizeParse() error {
        if p.videoPID == 0 && len(p.audioPIDs) == 0 {
                return fmt.Errorf("no video or audio PIDs found in PMT")
        }

        if err := p.buildFilteredVideoRanges(); err != nil {
                return fmt.Errorf("build filtered video ranges: %w", err)
        }

        p.filterUserData = true
        p.splitTrueHDAC3Streams()
        p.splitDTSHDCoreStreams()

        return nil
}

// parsePATandPMT finds the PAT and PMT in the first portion of the file
// and extracts video/audio PIDs and stream types.
func (p *MPEGTSParser) parsePATandPMT(data []byte, startOffset int) error {
        // Find PAT (PID 0) and extract PMT PID
        patSection, err := reassemblePSISection(data, startOffset, p.packetSize, p.tsOffset, 0, 0x00)
        if err != nil {
                return fmt.Errorf("reassemble PAT: %w", err)
        }

        pmtPID := pmtPIDFromPAT(patSection)
        if pmtPID == 0 {
                return fmt.Errorf("PMT PID not found in PAT")
        }

        // Find PMT and extract stream types.
        // PMT sections can span multiple TS packets, so we must reassemble.
        pmtSection, err := reassemblePSISection(data, startOffset, p.packetSize, p.tsOffset, pmtPID, 0x02)
        if err != nil {
                return fmt.Errorf("reassemble PMT: %w", err)
        }

        if len(pmtSection) >= 12 {
                progInfoLen := int(pmtSection[10]&0x0F)<<8 | int(pmtSection[11])
                streamsStart := 12 + progInfoLen
                sectionLen := int(pmtSection[1]&0x0F)<<8 | int(pmtSection[2])
                streamsEnd := 3 + sectionLen - 4 // exclude CRC32

                if streamsEnd > len(pmtSection) {
                        streamsEnd = len(pmtSection)
                }

                var subStreamSeq byte
                for j := streamsStart; j+5 <= streamsEnd; {
                        streamType := pmtSection[j]
                        esPID := uint16(pmtSection[j+1]&0x1F)<<8 | uint16(pmtSection[j+2])
                        esInfoLen := int(pmtSection[j+3]&0x0F)<<8 | int(pmtSection[j+4])

                        ct := tsStreamTypeToCodecType(streamType)
                        if ct != CodecUnknown {
                                if IsVideoCodec(ct) && p.videoPID == 0 {
                                        p.videoPID = esPID
                                        p.videoCodec = ct
                                } else if IsAudioCodec(ct) || IsSubtitleCodec(ct) {
                                        p.audioPIDs = append(p.audioPIDs, esPID)
                                        p.pidToSubStream[esPID] = subStreamSeq
                                        p.subStreamToPID[subStreamSeq] = esPID
                                        p.subStreamCodec[subStreamSeq] = ct
                                        p.audioSubStreams = append(p.audioSubStreams, subStreamSeq)
                                        subStreamSeq++
                                }
                        }

                        next := j + 5 + esInfoLen
                        if next < j || next > streamsEnd {
                                break
                        }
                        j = next
                }
        }

        return nil
}

// reassemblePSISection collects a complete PSI section (PAT, PMT, etc.) from
// one or more TS packets. packetSize is 188 (standard TS) or 192 (M2TS).
// tsOffset is the offset from packet start to TS sync byte (4 for M2TS, 0 for TS).
func reassemblePSISection(data []byte, startOffset, packetSize, tsOffset int, targetPID uint16, tableID byte) ([]byte, error) {
        var section []byte
        sectionLen := -1
        collecting := false

        for i := startOffset; i+packetSize <= len(data); i += packetSize {
                tsStart := i + tsOffset
                if tsStart+188 > len(data) || data[tsStart] != 0x47 {
                        continue
                }

                pid := uint16(data[tsStart+1]&0x1F)<<8 | uint16(data[tsStart+2])
                if pid != targetPID {
                        continue
                }

                pusi := data[tsStart+1]&0x40 != 0
                adaptFieldCtrl := (data[tsStart+3] >> 4) & 0x03
                hdrLen := 4
                switch adaptFieldCtrl {
                case 0x02: // Adaptation field only, no payload
                        continue
                case 0x03: // Adaptation field + payload
                        if tsStart+4 >= len(data) {
                                continue
                        }
                        hdrLen = 5 + int(data[tsStart+4])
                case 0x01: // Payload only
                default:
                        continue
                }
                if tsStart+hdrLen >= tsStart+188 {
                        continue
                }

                payload := data[tsStart+hdrLen : tsStart+188]

                if pusi {
                        // PUSI packet: pointer_field indicates how many bytes at the start
                        // of the payload belong to the tail of a previous section.
                        pointerField := int(payload[0])
                        sectionStart := 1 + pointerField
                        if sectionStart > len(payload) {
                                continue
                        }

                        // If we're mid-collection, the bytes before sectionStart are the
                        // tail of the section we're assembling.
                        if collecting && pointerField > 0 {
                                tail := payload[1:sectionStart]
                                remaining := sectionLen - len(section)
                                if len(tail) > remaining {
                                        tail = tail[:remaining]
                                }
                                section = append(section, tail...)
                                if len(section) >= sectionLen {
                                        return section, nil
                                }
                        }

                        payload = payload[sectionStart:]
                        if len(payload) < 3 || payload[0] != tableID {
                                continue
                        }

                        sectionLen = 3 + (int(payload[1]&0x0F)<<8 | int(payload[2]))
                        section = make([]byte, 0, sectionLen)
                        collecting = true

                        // Append what we have from this packet
                        n := len(payload)
                        if n > sectionLen {
                                n = sectionLen
                        }
                        section = append(section, payload[:n]...)
                } else if collecting {
                        // Continuation packet
                        remaining := sectionLen - len(section)
                        n := len(payload)
                        if n > remaining {
                                n = remaining
                        }
                        section = append(section, payload[:n]...)
                }

                if collecting && len(section) >= sectionLen {
                        return section, nil
                }
        }

        if collecting {
                return nil, fmt.Errorf("truncated PSI section for table ID 0x%02X on PID 0x%04X: got %d of %d bytes", tableID, targetPID, len(section), sectionLen)
        }
        return nil, fmt.Errorf("PSI section with table ID 0x%02X not found on PID 0x%04X", tableID, targetPID)
}

// pmtPIDFromPAT extracts the PMT PID from a reassembled PAT section.
// Returns the PID of the first non-zero program, or 0 if none found.
func pmtPIDFromPAT(patSection []byte) uint16 {
        if len(patSection) < 8 {
                return 0
        }
        sectionLen := int(patSection[1]&0x0F)<<8 | int(patSection[2])
        progsEnd := 3 + sectionLen - 4 // section_length counts from byte 3; subtract 4 for CRC
        if progsEnd > len(patSection) {
                progsEnd = len(patSection)
        }
        for j := 8; j+4 <= progsEnd; j += 4 {
                progNum := uint16(patSection[j])<<8 | uint16(patSection[j+1])
                if progNum == 0 {
                        continue // Network PID, skip
                }
                return uint16(patSection[j+2]&0x1F)<<8 | uint16(patSection[j+3])
        }
        return 0
}

// buildFilteredVideoRanges creates filtered video ranges.
// For MPEG-2 video, this excludes user_data (00 00 01 B2) sections.
// For H.264/H.265, filtered ranges are the same as raw ranges (no filtering needed).
func (p *MPEGTSParser) buildFilteredVideoRanges() error {
        if len(p.videoRanges) == 0 {
                return nil
        }

        // Only MPEG-2 needs user_data filtering
        if p.videoCodec != CodecMPEG2Video {
                // For H.264/H.265/etc, no filtering needed — use raw ranges directly
                p.filteredVideoRanges = p.videoRanges
                return nil
        }

        // MPEG-2: scan for user_data sections and exclude them
        // Same algorithm as MPEGPSParser.buildFilteredVideoRanges
        filteredRanges := make([]PESPayloadRange, 0, len(p.videoRanges))
        var filteredESOffset int64

        for _, rawRange := range p.videoRanges {
                endOffset := rawRange.FileOffset + int64(rawRange.Size)
                if endOffset > p.size {
                        continue
                }
                data := p.dataSlice(rawRange.FileOffset, endOffset)

                i := 2
                rangeStart := 0
                for i < len(data)-1 {
                        idx := bytes.IndexByte(data[i:], 0x01)
                        if idx < 0 {
                                break
                        }
                        pos := i + idx

                        if pos >= 2 && pos < len(data)-1 &&
                                data[pos-1] == 0x00 && data[pos-2] == 0x00 && data[pos+1] == UserDataStartCode {
                                startCodePos := pos - 2
                                if startCodePos > rangeStart {
                                        filteredRanges = append(filteredRanges, PESPayloadRange{
                                                FileOffset: rawRange.FileOffset + int64(rangeStart),
                                                Size:       startCodePos - rangeStart,
                                                ESOffset:   filteredESOffset,
                                        })
                                        filteredESOffset += int64(startCodePos - rangeStart)
                                }

                                i = pos + 2
                                for i < len(data)-1 {
                                        idx := bytes.IndexByte(data[i:], 0x01)
                                        if idx < 0 {
                                                i = len(data)
                                                break
                                        }
                                        nextPos := i + idx
                                        if nextPos >= 2 && data[nextPos-1] == 0x00 && data[nextPos-2] == 0x00 {
                                                i = nextPos - 2
                                                break
                                        }
                                        i = nextPos + 1
                                }
                                rangeStart = i
                        } else {
                                i = pos + 1
                        }
                }

                if rangeStart < len(data) {
                        filteredRanges = append(filteredRanges, PESPayloadRange{
                                FileOffset: rawRange.FileOffset + int64(rangeStart),
                                Size:       len(data) - rangeStart,
                                ESOffset:   filteredESOffset,
                        })
                        filteredESOffset += int64(len(data) - rangeStart)
                }
        }

        p.filteredVideoRanges = filteredRanges
        return nil
}

package source

// splitTrueHDAC3Streams detects combined TrueHD+AC3 audio streams and splits
// them into separate sub-streams. On Blu-ray, TrueHD streams (PMT type 0x83)
// interleave an AC3 compatibility core in the same PID. Video extraction tools split
// these into separate MKV tracks, so we must split them here to match.
func (p *MPEGTSParser) splitTrueHDAC3Streams() {
        for _, subID := range p.audioSubStreams {
                if p.subStreamCodec[subID] != CodecTrueHDAudio {
                        continue
                }
                ranges := p.audioBySubStream[subID]
                if len(ranges) == 0 {
                        continue
                }

                // Check if this stream actually has interleaved AC3
                if !p.detectCombinedTrueHDAC3(ranges) {
                        continue
                }

                // Split the combined ranges
                ac3Ranges, truehdRanges := p.splitCombinedAudioRanges(ranges)
                if len(ac3Ranges) == 0 {
                        continue
                }

                // Merge adjacent ranges to reduce count
                ac3Ranges = mergeAdjacentRanges(ac3Ranges)
                truehdRanges = mergeAdjacentRanges(truehdRanges)

                // Replace original sub-stream with TrueHD-only ranges
                p.audioBySubStream[subID] = truehdRanges

                // Add AC3 as a new sub-stream
                newSubID := byte(len(p.audioSubStreams))
                p.audioBySubStream[newSubID] = ac3Ranges
                p.subStreamCodec[newSubID] = CodecAC3Audio
                p.audioSubStreams = append(p.audioSubStreams, newSubID)

        }
}

// detectCombinedTrueHDAC3 checks if a TrueHD audio stream contains interleaved
// AC3 frames by scanning the first few KB of ES data for both sync patterns.
func (p *MPEGTSParser) detectCombinedTrueHDAC3(ranges []PESPayloadRange) bool {
        // Read up to 16KB of ES data to check for both patterns
        hasAC3 := false
        hasTrueHD := false
        bytesChecked := 0
        const maxCheck = 16 * 1024

        for _, r := range ranges {
                if bytesChecked >= maxCheck {
                        break
                }
                endOffset := r.FileOffset + int64(r.Size)
                if endOffset > p.size {
                        continue
                }
                data := p.dataSlice(r.FileOffset, endOffset)
                // Clamp to remaining check budget
                remaining := maxCheck - bytesChecked
                if remaining < len(data) {
                        data = data[:remaining]
                }
                for i := 0; i < len(data)-1; i++ {
                        if data[i] == 0x0B && data[i+1] == 0x77 {
                                hasAC3 = true
                        }
                        if i+3 < len(data) &&
                                data[i] == 0xF8 && data[i+1] == 0x72 &&
                                data[i+2] == 0x6F && data[i+3] == 0xBA {
                                hasTrueHD = true
                        }
                        if hasAC3 && hasTrueHD {
                                return true
                        }
                }
                bytesChecked += len(data)
        }
        return false
}

// splitCombinedAudioRanges splits PES payload ranges of a combined TrueHD+AC3
// stream into separate AC3 and TrueHD ranges using AU-aware parsing.
//
// The interleaved stream alternates between AC3 frames and TrueHD access units
// at unit boundaries. At each boundary, the parser checks for the AC3 sync word
// (0B 77) to identify AC3 frames, or reads the TrueHD AU length header to
// determine the AU size. This avoids false-positive AC3 detection inside TrueHD
// AU data, which the previous byte-scan approach was susceptible to.
func (p *MPEGTSParser) splitCombinedAudioRanges(ranges []PESPayloadRange) (ac3Ranges, truehdRanges []PESPayloadRange) {
        var ac3ES, truehdES int64
        ac3Remaining := 0    // bytes remaining in current AC3 frame
        truehdRemaining := 0 // bytes remaining in current TrueHD AU

        // Cross-boundary header buffer. At unit boundaries, we need 2 bytes
        // to determine type (AC3 vs TrueHD), or 5 bytes if starting with
        // AC3 sync 0B 77 (to read fscod+frmsizecod at byte 4).
        var headerBuf [5]byte
        headerBufLen := 0

        type pendingRange struct {
                fileOffset int64
                size       int
        }
        var pendingRanges []pendingRange

        for _, r := range ranges {
                endOffset := r.FileOffset + int64(r.Size)
                if endOffset > p.size {
                        continue
                }
                data := p.dataSlice(r.FileOffset, endOffset)
                pos := 0

                // Resolve buffered header bytes from previous range
                if headerBufLen > 0 && ac3Remaining == 0 && truehdRemaining == 0 {
                        // Determine how many total bytes we need
                        needTotal := 2
                        if headerBufLen >= 2 && headerBuf[0] == 0x0B && headerBuf[1] == 0x77 {
                                needTotal = 5
                        }
                        need := needTotal - headerBufLen
                        available := len(data) - pos
                        if need > available {
                                // Still not enough data — buffer more
                                copy(headerBuf[headerBufLen:], data[pos:])
                                headerBufLen += available
                                pendingRanges = append(pendingRanges, pendingRange{r.FileOffset + int64(pos), available})
                                continue
                        }

                        copy(headerBuf[headerBufLen:], data[pos:pos+need])
                        consumedFromCurrent := need
                        headerBufLen += need

                        // Re-check: we may now have 0B 77 and need more bytes
                        if headerBufLen >= 2 && headerBuf[0] == 0x0B && headerBuf[1] == 0x77 && headerBufLen < 5 {
                                moreNeed := 5 - headerBufLen
                                moreAvail := len(data) - pos - consumedFromCurrent
                                if moreNeed > moreAvail {
                                        // Still not enough for full AC3 header
                                        copy(headerBuf[headerBufLen:], data[pos+consumedFromCurrent:])
                                        pendingRanges = append(pendingRanges, pendingRange{r.FileOffset + int64(pos), consumedFromCurrent + moreAvail})
                                        headerBufLen += moreAvail
                                        continue
                                }
                                copy(headerBuf[headerBufLen:], data[pos+consumedFromCurrent:pos+consumedFromCurrent+moreNeed])
                                consumedFromCurrent += moreNeed
                                headerBufLen += moreNeed
                        }

                        // Classify the unit
                        isAC3 := false
                        unitSize := 0

                        if headerBuf[0] == 0x0B && headerBuf[1] == 0x77 && headerBufLen >= 5 {
                                fscod := (headerBuf[4] >> 6) & 0x03
                                frmsizecod := headerBuf[4] & 0x3F
                                frameSize := AC3FrameSize(fscod, frmsizecod)
                                if frameSize > 0 {
                                        isAC3 = true
                                        unitSize = frameSize
                                }
                        }

                        if !isAC3 {
                                auLen := ParseTrueHDAULength(headerBuf[:2])
                                if auLen >= 4 {
                                        unitSize = auLen
                                }
                        }

                        if unitSize > 0 {
                                // Attribute pending ranges + consumed bytes from current range
                                if isAC3 {
                                        for _, pr := range pendingRanges {
                                                ac3Ranges = append(ac3Ranges, PESPayloadRange{
                                                        FileOffset: pr.fileOffset,
                                                        Size:       pr.size,
                                                        ESOffset:   ac3ES,
                                                })
                                                ac3ES += int64(pr.size)
                                        }
                                        if consumedFromCurrent > 0 {
                                                ac3Ranges = append(ac3Ranges, PESPayloadRange{
                                                        FileOffset: r.FileOffset + int64(pos),
                                                        Size:       consumedFromCurrent,
                                                        ESOffset:   ac3ES,
                                                })
                                                ac3ES += int64(consumedFromCurrent)
                                        }
                                        ac3Remaining = unitSize - headerBufLen
                                } else {
                                        for _, pr := range pendingRanges {
                                                truehdRanges = append(truehdRanges, PESPayloadRange{
                                                        FileOffset: pr.fileOffset,
                                                        Size:       pr.size,
                                                        ESOffset:   truehdES,
                                                })
                                                truehdES += int64(pr.size)
                                        }
                                        if consumedFromCurrent > 0 {
                                                truehdRanges = append(truehdRanges, PESPayloadRange{
                                                        FileOffset: r.FileOffset + int64(pos),
                                                        Size:       consumedFromCurrent,
                                                        ESOffset:   truehdES,
                                                })
                                                truehdES += int64(consumedFromCurrent)
                                        }
                                        truehdRemaining = unitSize - headerBufLen
                                }
                        } else {
                                // Unrecognized — attribute all buffered bytes to TrueHD
                                for _, pr := range pendingRanges {
                                        truehdRanges = append(truehdRanges, PESPayloadRange{
                                                FileOffset: pr.fileOffset,
                                                Size:       pr.size,
                                                ESOffset:   truehdES,
                                        })
                                        truehdES += int64(pr.size)
                                }
                                if consumedFromCurrent > 0 {
                                        truehdRanges = append(truehdRanges, PESPayloadRange{
                                                FileOffset: r.FileOffset + int64(pos),
                                                Size:       consumedFromCurrent,
                                                ESOffset:   truehdES,
                                        })
                                        truehdES += int64(consumedFromCurrent)
                                }
                        }

                        pos += consumedFromCurrent
                        headerBufLen = 0
                        pendingRanges = nil
                }

                for pos < len(data) {
                        if ac3Remaining > 0 {
                                consume := min(ac3Remaining, len(data)-pos)
                                ac3Ranges = append(ac3Ranges, PESPayloadRange{
                                        FileOffset: r.FileOffset + int64(pos),
                                        Size:       consume,
                                        ESOffset:   ac3ES,
                                })
                                ac3ES += int64(consume)
                                ac3Remaining -= consume
                                pos += consume
                                continue
                        }

                        if truehdRemaining > 0 {
                                consume := min(truehdRemaining, len(data)-pos)
                                truehdRanges = append(truehdRanges, PESPayloadRange{
                                        FileOffset: r.FileOffset + int64(pos),
                                        Size:       consume,
                                        ESOffset:   truehdES,
                                })
                                truehdES += int64(consume)
                                truehdRemaining -= consume
                                pos += consume
                                continue
                        }

                        // At unit boundary — determine type
                        available := len(data) - pos

                        // Need at least 2 bytes to determine type
                        if available < 2 {
                                copy(headerBuf[:], data[pos:])
                                headerBufLen = available
                                pendingRanges = []pendingRange{{r.FileOffset + int64(pos), available}}
                                pos = len(data)
                                continue
                        }

                        // Check for AC3 sync word
                        if data[pos] == 0x0B && data[pos+1] == 0x77 {
                                if available < 5 {
                                        // Need more bytes for AC3 header
                                        copy(headerBuf[:], data[pos:pos+available])
                                        headerBufLen = available
                                        pendingRanges = []pendingRange{{r.FileOffset + int64(pos), available}}
                                        pos = len(data)
                                        continue
                                }

                                fscod := (data[pos+4] >> 6) & 0x03
                                frmsizecod := data[pos+4] & 0x3F
                                frameSize := AC3FrameSize(fscod, frmsizecod)
                                if frameSize > 0 {
                                        ac3Remaining = frameSize
                                        continue
                                }
                        }

                        // TrueHD AU: parse length from first 2 bytes
                        auLen := ParseTrueHDAULength(data[pos:])
                        if auLen >= 4 {
                                truehdRemaining = auLen
                                continue
                        }

                        // Unrecognized — consume byte-by-byte as TrueHD
                        truehdRanges = append(truehdRanges, PESPayloadRange{
                                FileOffset: r.FileOffset + int64(pos),
                                Size:       1,
                                ESOffset:   truehdES,
                        })
                        truehdES++
                        pos++
                }
        }

        // Attribute remaining buffered bytes to TrueHD
        if headerBufLen > 0 {
                for _, pr := range pendingRanges {
                        truehdRanges = append(truehdRanges, PESPayloadRange{
                                FileOffset: pr.fileOffset,
                                Size:       pr.size,
                                ESOffset:   truehdES,
                        })
                        truehdES += int64(pr.size)
                }
        }

        return ac3Ranges, truehdRanges
}

package source

// mergeAdjacentRanges merges consecutive PESPayloadRange entries that are
// contiguous in both file offset and ES offset.
func mergeAdjacentRanges(ranges []PESPayloadRange) []PESPayloadRange {
        if len(ranges) <= 1 {
                return ranges
        }
        merged := make([]PESPayloadRange, 0, len(ranges)/2)
        merged = append(merged, ranges[0])
        for i := 1; i < len(ranges); i++ {
                last := &merged[len(merged)-1]
                r := ranges[i]
                if r.FileOffset == last.FileOffset+int64(last.Size) &&
                        r.ESOffset == last.ESOffset+int64(last.Size) {
                        last.Size += r.Size
                } else {
                        merged = append(merged, r)
                }
        }
        return merged
}

package source

import (
        "sort"
        "sync/atomic"
)

// multiRegionData provides a virtual contiguous view over multiple
// non-contiguous byte slices from a memory-mapped ISO. Used for
// multi-extent UDF files where M2TS data is split across
// non-contiguous ISO regions.
type multiRegionData struct {
        regions   []multiRegion
        totalSize int64
        lastIdx   atomic.Int32 // cached region index for fast sequential access
}

type multiRegion struct {
        data         []byte
        logicalStart int64 // cumulative offset in the virtual contiguous view
}

// newMultiRegionData creates a multiRegionData from ISO physical extents.
// Each extent becomes a region backed by a sub-slice of isoData (zero-copy).
func newMultiRegionData(extents []isoPhysicalRange, isoData []byte) *multiRegionData {
        mr := &multiRegionData{
                regions: make([]multiRegion, len(extents)),
        }
        logicalOff := int64(0)
        isoLen := int64(len(isoData))
        for i, ext := range extents {
                end := ext.ISOOffset + ext.Length
                if ext.ISOOffset < 0 || end > isoLen {
                        // Clamp to ISO bounds (corrupted/malformed UDF metadata)
                        start := ext.ISOOffset
                        if start < 0 {
                                start = 0
                        }
                        if end > isoLen {
                                end = isoLen
                        }
                        if start >= end {
                                mr.regions[i] = multiRegion{logicalStart: logicalOff}
                                continue
                        }
                        mr.regions[i] = multiRegion{
                                data:         isoData[start:end],
                                logicalStart: logicalOff,
                        }
                        logicalOff += end - start
                        continue
                }
                mr.regions[i] = multiRegion{
                        data:         isoData[ext.ISOOffset:end],
                        logicalStart: logicalOff,
                }
                logicalOff += ext.Length
        }
        mr.totalSize = logicalOff
        return mr
}

// Len returns the total logical size across all regions.
func (m *multiRegionData) Len() int64 { return m.totalSize }

// regionFor returns the index of the region containing the given logical offset.
// Returns len(m.regions) if the offset is beyond all regions.
func (m *multiRegionData) regionFor(off int64) int {
        // Fast path: check cached index
        cached := int(m.lastIdx.Load())
        if cached < len(m.regions) {
                r := m.regions[cached]
                if off >= r.logicalStart && off < r.logicalStart+int64(len(r.data)) {
                        return cached
                }
        }
        // Binary search
        idx := sort.Search(len(m.regions), func(i int) bool {
                return m.regions[i].logicalStart+int64(len(m.regions[i].data)) > off
        })
        if idx < len(m.regions) {
                m.lastIdx.Store(int32(idx))
        }
        return idx
}

// ByteAt returns the byte at the given logical offset.
// Returns 0 if the offset is out of bounds.
func (m *multiRegionData) ByteAt(off int64) byte {
        if off < 0 || off >= m.totalSize {
                return 0
        }
        idx := m.regionFor(off)
        if idx >= len(m.regions) {
                return 0
        }
        r := m.regions[idx]
        return r.data[off-r.logicalStart]
}

// Slice returns a byte slice for the given logical offset range [off, end).
// Returns a zero-copy sub-slice when the range falls within one region.
// Copies into a new buffer when the range straddles a region boundary.
func (m *multiRegionData) Slice(off, end int64) []byte {
        if off < 0 || end < 0 || off >= end {
                return nil
        }
        idx := m.regionFor(off)
        if idx >= len(m.regions) {
                return nil
        }
        r := m.regions[idx]
        regionOff := off - r.logicalStart
        regionEnd := end - r.logicalStart
        if regionEnd <= int64(len(r.data)) {
                // Fast path: entirely within one region (zero-copy)
                return r.data[regionOff:regionEnd]
        }
        // Slow path: straddles region boundary — copy
        size := int(end - off)
        buf := make([]byte, size)
        copied := copy(buf, r.data[regionOff:])
        for i := idx + 1; i < len(m.regions) && copied < size; i++ {
                r := m.regions[i]
                n := copy(buf[copied:], r.data)
                copied += n
        }
        return buf
}

// Package source provides functionality for indexing source media files (DVD ISOs, Blu-ray directories).
package source

import (
        "errors"
        "os"
        "path/filepath"
        "sort"
        "strings"
        "sync"

        "github.com/stuckj/mkvdup/internal/mmap"
)

// Type represents the type of source media.
type Type int

// Source type constants.
const (
        TypeDVD    Type = iota // Contains .iso file
        TypeBluray             // Contains BDMV/STREAM/*.m2ts
)

func (t Type) String() string {
        switch t {
        case TypeDVD:
                return "DVD"
        case TypeBluray:
                return "Blu-ray"
        default:
                return "Unknown"
        }
}

// ErrUnknownSourceType is returned when the source directory type cannot be determined.
var ErrUnknownSourceType = errors.New("unknown source type: directory contains neither ISO nor BDMV structure")

// DetectType determines whether a directory contains a DVD ISO or Blu-ray structure.
// ISOs are inspected to determine if they contain DVD (VIDEO_TS) or Blu-ray (BDMV) content.
func DetectType(dir string) (Type, error) {
        // Check for ISO files
        isos, err := filepath.Glob(filepath.Join(dir, "*.iso"))
        if err != nil {
                return 0, err
        }

        // Also check for ISO in subdirectory (common structure)
        subIsos, err := filepath.Glob(filepath.Join(dir, "*", "*.iso"))
        if err != nil {
                return 0, err
        }
        isos = append(isos, subIsos...)

        // If we found ISOs, inspect them to determine type
        if len(isos) > 0 {
                // Check the first ISO to determine type
                isoType, err := detectISOType(isos[0])
                if err != nil {
                        // If we can't read the ISO, default to DVD (legacy behavior)
                        return TypeDVD, nil
                }
                return isoType, nil
        }

        // Check for Blu-ray directory structure
        m2ts, err := filepath.Glob(filepath.Join(dir, "BDMV", "STREAM", "*.m2ts"))
        if err != nil {
                return 0, err
        }
        if len(m2ts) > 0 {
                return TypeBluray, nil
        }

        return 0, ErrUnknownSourceType
}

// detectISOType examines an ISO file to determine if it's a DVD or Blu-ray.
// DVDs have VIDEO_TS directory, Blu-rays have BDMV directory.
// Uses minimal reads to avoid loading the entire ISO into memory.
func detectISOType(isoPath string) (Type, error) {
        f, err := os.Open(isoPath)
        if err != nil {
                return 0, err
        }
        defer f.Close()

        // ISO9660 primary volume descriptor is at sector 16 (2048 bytes per sector)
        // The root directory record is embedded in the volume descriptor at offset 156.
        const sectorSize = 2048
        const pvdOffset = 16 * sectorSize

        // Read the primary volume descriptor
        pvd := make([]byte, sectorSize)
        if _, err := f.ReadAt(pvd, pvdOffset); err != nil {
                return 0, err
        }

        // Check volume descriptor type (byte 0) and signature "CD001" (bytes 1-5)
        if pvd[0] != 1 || string(pvd[1:6]) != "CD001" {
                // No ISO9660 PVD. Check for UDF (Blu-ray ISOs from CloneBD).
                if isUDFImage(f) {
                        return detectUDFISOType(f)
                }
                return TypeDVD, nil
        }

        // Root directory record is at offset 156, length at byte 0 of the record
        rootDirRecord := pvd[156:]
        if len(rootDirRecord) < 34 {
                return TypeDVD, nil
        }

        // Extract root directory extent location (bytes 2-5, little-endian)
        rootExtent := uint32(rootDirRecord[2]) | uint32(rootDirRecord[3])<<8 |
                uint32(rootDirRecord[4])<<16 | uint32(rootDirRecord[5])<<24
        // Extract root directory data length (bytes 10-13, little-endian)
        rootDataLen := uint32(rootDirRecord[10]) | uint32(rootDirRecord[11])<<8 |
                uint32(rootDirRecord[12])<<16 | uint32(rootDirRecord[13])<<24

        // Read the root directory
        // Limit to first 16KB to avoid reading huge directories
        if rootDataLen > 16*1024 {
                rootDataLen = 16 * 1024
        }
        rootDir := make([]byte, rootDataLen)
        if _, err := f.ReadAt(rootDir, int64(rootExtent)*sectorSize); err != nil {
                return 0, err
        }

        // Parse directory entries looking for VIDEO_TS or BDMV
        hasBDMV := false
        hasVideoTS := false

        offset := 0
        for offset < len(rootDir) {
                recLen := int(rootDir[offset])
                if recLen == 0 {
                        // Move to next sector boundary
                        nextSector := ((offset / sectorSize) + 1) * sectorSize
                        if nextSector >= len(rootDir) {
                                break
                        }
                        offset = nextSector
                        continue
                }
                if offset+recLen > len(rootDir) {
                        break
                }

                // Name length is at offset 32
                if offset+33 > len(rootDir) {
                        break
                }
                nameLen := int(rootDir[offset+32])
                if offset+33+nameLen > len(rootDir) {
                        break
                }

                // Extract and check the filename
                name := strings.ToUpper(string(rootDir[offset+33 : offset+33+nameLen]))
                // Strip version number (;1) if present
                if idx := strings.Index(name, ";"); idx >= 0 {
                        name = name[:idx]
                }
                // Strip trailing dot if present
                name = strings.TrimSuffix(name, ".")

                if name == "BDMV" {
                        hasBDMV = true
                }
                if name == "VIDEO_TS" {
                        hasVideoTS = true
                }

                offset += recLen
        }

        // Blu-ray takes precedence if both are present
        if hasBDMV {
                return TypeBluray, nil
        }
        if hasVideoTS {
                return TypeDVD, nil
        }

        // Default to DVD for unrecognized ISOs
        return TypeDVD, nil
}

// File represents a source file within the source directory.
type File struct {
        RelativePath string // Path relative to source directory
        Size         int64
        Checksum     uint64 // xxhash of file for integrity
}

// Location represents a position within a source file where a hash was found.
type Location struct {
        FileIndex        uint16 // Index into Files array
        Offset           int64  // Offset within that file (or ES offset for MPEG-PS)
        IsVideo          bool   // For ES-based indexes: true for video ES, false for audio ES
        AudioSubStreamID byte   // For audio in MPEG-PS: sub-stream ID (0x80-0x87 = AC3, etc.)
}

// ESRangeConverter provides an interface for converting ES offsets to raw file offsets.
// This is used during dedup file creation to convert ES-based entries to raw-offset entries.
type ESRangeConverter interface {
        // RawRangesForESRegion returns the raw file ranges that contain the given ES region.
        // Each returned range represents a contiguous chunk of raw file data.
        // The sum of all returned range sizes equals the requested ES region size.
        // For video streams only - audio should use RawRangesForAudioSubStream.
        RawRangesForESRegion(esOffset int64, size int, isVideo bool) ([]RawRange, error)
        // RawRangesForAudioSubStream returns the raw file ranges for audio data from a specific sub-stream.
        RawRangesForAudioSubStream(subStreamID byte, esOffset int64, size int) ([]RawRange, error)
}

// ESReader provides an interface for reading elementary stream data from container files.
type ESReader interface {
        // ReadESData reads size bytes of ES data starting at esOffset.
        // The data is continuous ES data, with container headers stripped.
        // For video, this works as expected. For audio, use ReadAudioSubStreamData instead.
        ReadESData(esOffset int64, size int, isVideo bool) ([]byte, error)
        // ESOffsetToFileOffset converts an ES offset to a file offset and remaining bytes in that segment.
        ESOffsetToFileOffset(esOffset int64, isVideo bool) (fileOffset int64, remaining int)
        // TotalESSize returns the total size of the elementary stream.
        // For video, returns filtered video ES size. For audio, returns 0 - use AudioSubStreamESSize.
        TotalESSize(isVideo bool) int64
        // AudioSubStreams returns the list of audio sub-stream IDs in order of appearance.
        AudioSubStreams() []byte
        // AudioSubStreamESSize returns the ES size for a specific audio sub-stream.
        AudioSubStreamESSize(subStreamID byte) int64
        // ReadAudioSubStreamData reads audio data from a specific sub-stream.
        ReadAudioSubStreamData(subStreamID byte, esOffset int64, size int) ([]byte, error)
}

// PESRangeProvider provides access to PES payload ranges for building range maps.
// Both MPEGPSParser and MPEGTSParser implement this.
type PESRangeProvider interface {
        FilteredVideoRanges() []PESPayloadRange
        FilteredAudioRanges(subStreamID byte) []PESPayloadRange
        AudioSubStreams() []byte
}

// FileOffsetAdjuster provides a function to convert parser-relative FileOffset
// values to source-file-relative offsets for range map storage.
// Implemented by isoM2TSAdapter where the parser operates on a sub-region
// of the ISO and FileOffset values need to be adjusted to ISO-relative.
type FileOffsetAdjuster interface {
        FileOffsetConverter() func(int64) int64
}

// RawReader provides an interface for reading raw file data.
type RawReader interface {
        ReadAt(buf []byte, offset int64) (int, error)
        // Slice returns a zero-copy slice of the underlying data.
        // Returns nil if offset is out of range.
        Slice(offset int64, size int) []byte
        Len() int
        Close() error
}

// Index holds the hash-to-location mapping for fast lookup of byte sequences.
type Index struct {
        // HashToLocations maps from xxhash to list of locations where that hash was found
        HashToLocations map[uint64][]Location

        // SourceDir is the path to the source directory
        SourceDir string

        // SourceType indicates whether this is DVD or Blu-ray
        SourceType Type

        // Files lists all media files in the source
        Files []File

        // WindowSize is the number of bytes used for hashing
        WindowSize int

        // ESReaders provides ES-aware reading for each file (nil for raw files)
        // For MPEG-PS files, this allows reading continuous ES data.
        ESReaders []ESReader

        // RawReaders provides raw file reading for each file.
        // Used when raw file indexing is enabled.
        RawReaders []RawReader

        // MmapFiles holds the mmap file handles for proper cleanup.
        // These back the ESReaders for MPEG-PS files.
        MmapFiles []*mmap.File

        // UsesESOffsets indicates whether Location.Offset values are ES offsets
        // rather than raw file offsets. True for DVD (MPEG-PS) sources.
        UsesESOffsets bool

        // sortOnce ensures SortLocationsByOffset runs only once.
        sortOnce sync.Once
}

// NewIndex creates a new empty Index for the given source directory.
func NewIndex(sourceDir string, sourceType Type, windowSize int) *Index {
        return &Index{
                HashToLocations: make(map[uint64][]Location),
                SourceDir:       sourceDir,
                SourceType:      sourceType,
                WindowSize:      windowSize,
        }
}

// SortLocationsByOffset sorts all location slices by (FileIndex, Offset).
// This is a one-time cost at match setup time that enables binary search
// for nearby locations during matching. Must be called before concurrent access.
func (idx *Index) SortLocationsByOffset() {
        idx.sortOnce.Do(func() {
                for hash, locs := range idx.HashToLocations {
                        if len(locs) > 1 {
                                sort.Slice(locs, func(i, j int) bool {
                                        if locs[i].FileIndex != locs[j].FileIndex {
                                                return locs[i].FileIndex < locs[j].FileIndex
                                        }
                                        return locs[i].Offset < locs[j].Offset
                                })
                                idx.HashToLocations[hash] = locs
                        }
                }
        })
}

// EnumerateMediaFiles returns the list of media files to index based on source type.
func EnumerateMediaFiles(dir string, sourceType Type) ([]string, error) {
        var files []string

        switch sourceType {
        case TypeDVD:
                // Look for ISO files
                isos, err := filepath.Glob(filepath.Join(dir, "*.iso"))
                if err != nil {
                        return nil, err
                }
                files = append(files, isos...)

                // Also check subdirectory
                isos, err = filepath.Glob(filepath.Join(dir, "*", "*.iso"))
                if err != nil {
                        return nil, err
                }
                files = append(files, isos...)

        case TypeBluray:
                // Look for m2ts files in BDMV/STREAM (extracted Blu-ray)
                m2ts, err := filepath.Glob(filepath.Join(dir, "BDMV", "STREAM", "*.m2ts"))
                if err != nil {
                        return nil, err
                }
                files = append(files, m2ts...)

                // If no extracted M2TS files, look for Blu-ray ISOs
                if len(files) == 0 {
                        isos, err := filepath.Glob(filepath.Join(dir, "*.iso"))
                        if err != nil {
                                return nil, err
                        }
                        files = append(files, isos...)

                        // Also check subdirectory (same pattern as DVD)
                        isos, err = filepath.Glob(filepath.Join(dir, "*", "*.iso"))
                        if err != nil {
                                return nil, err
                        }
                        files = append(files, isos...)
                }
        }

        // Convert to relative paths
        relFiles := make([]string, 0, len(files))
        for _, f := range files {
                rel, err := filepath.Rel(dir, f)
                if err != nil {
                        return nil, err
                }
                relFiles = append(relFiles, rel)
        }

        return relFiles, nil
}

// GetFileInfo returns size information for a file.
func GetFileInfo(path string) (int64, error) {
        info, err := os.Stat(path)
        if err != nil {
                return 0, err
        }
        return info.Size(), nil
}

// ReadRawDataAt reads raw data from the source file at the given location.
// This is used for raw file indexing (non-ES mode).
// Note: This copies data. Prefer RawSlice for zero-copy access.
func (idx *Index) ReadRawDataAt(loc Location, size int) ([]byte, error) {
        if int(loc.FileIndex) >= len(idx.RawReaders) || idx.RawReaders[loc.FileIndex] == nil {
                return nil, errors.New("no raw reader for file")
        }
        buf := make([]byte, size)
        n, err := idx.RawReaders[loc.FileIndex].ReadAt(buf, loc.Offset)
        if err != nil && n < size {
                return buf[:n], err
        }
        return buf[:n], nil
}

// RawSlice returns a zero-copy slice of raw data at the given location.
// Returns nil if the location is out of range.
func (idx *Index) RawSlice(loc Location, size int) []byte {
        if int(loc.FileIndex) >= len(idx.RawReaders) || idx.RawReaders[loc.FileIndex] == nil {
                return nil
        }
        return idx.RawReaders[loc.FileIndex].Slice(loc.Offset, size)
}

package source

// FindPGSSyncPoints returns byte offsets of PGS segment boundaries in data.
// PGS segments have a 3-byte header: [type (1 byte)] [size (2 bytes BE)].
// Each segment start is a sync point. Valid segment types are:
// 0x14 (PDS), 0x15 (ODS), 0x16 (PCS), 0x17 (WDS), 0x80 (END).
func FindPGSSyncPoints(data []byte) []int {
        var offsets []int
        off := 0
        for off+3 <= len(data) {
                segType := data[off]
                if !isValidPGSSegmentType(segType) {
                        break
                }
                offsets = append(offsets, off)
                segSize := int(data[off+1])<<8 | int(data[off+2])
                off += 3 + segSize
        }
        return offsets
}

func isValidPGSSegmentType(t byte) bool {
        switch t {
        case 0x14, 0x15, 0x16, 0x17, 0x80:
                return true
        }
        return false
}

package source

// ParseTrueHDAULength extracts the access unit length in bytes from
// the first 2 bytes of a TrueHD AU header. The lower 12 bits encode
// the length in 16-bit words; multiply by 2 for bytes.
func ParseTrueHDAULength(header []byte) int {
        if len(header) < 2 {
                return 0
        }
        return ((int(header[0])<<8 | int(header[1])) & 0x0FFF) * 2
}

package source

import (
        "encoding/binary"
        "fmt"
        "os"
        "strings"
)

// UDF descriptor tag IDs.
const (
        udfTagAVDP          = 2
        udfTagPartitionDesc = 5
        udfTagLogicalVolume = 6
        udfTagFileSetDesc   = 256
        udfTagFileEntry     = 261
        udfTagFID           = 257
        udfTagExtFileEntry  = 266
)

// udfDescriptorTag is the 16-byte tag at the start of every UDF descriptor.
type udfDescriptorTag struct {
        TagID   uint16
        Version uint16
}

// udfExtent represents a physical extent (offset + length) on disk.
type udfExtent struct {
        Length   uint32
        Location uint32
}

// udfLongAD is a "long allocation descriptor" (16 bytes) used to reference
// data across partitions.
type udfLongAD struct {
        Length   uint32
        Location uint32 // logical block number within partition
        PartRef  uint16 // partition reference number
}

// udfShortAD is a "short allocation descriptor" (8 bytes).
type udfShortAD struct {
        Length   uint32
        Position uint32 // logical block number
}

// udfPartitionDesc holds fields from a UDF Partition Descriptor (tag 5).
type udfPartitionDesc struct {
        PartitionNumber  uint16
        StartingLocation uint32 // physical sector number
}

// udfLogicalVolumeDesc holds fields from a UDF Logical Volume Descriptor (tag 6).
type udfLogicalVolumeDesc struct {
        BlockSize     uint32
        FSDLocation   udfLongAD // File Set Descriptor location
        PartitionMaps []udfPartitionMap
}

// udfPartitionMap describes a partition map entry from the Logical Volume Descriptor.
type udfPartitionMap struct {
        Type         byte // 1 = physical, 2 = metadata/virtual/sparable
        PartitionNum uint16
        IsMetadata   bool
        MetaFileLoc  uint32 // for metadata partitions: file location
}

// udfFileEntry holds parsed fields from a File Entry (tag 261) or
// Extended File Entry (tag 266).
type udfFileEntry struct {
        ICBTag     byte // file type (4=directory, 5=file)
        InfoLength uint64
        AllocDescs []byte // raw allocation descriptors
        AllocType  byte   // 0=short_ad, 1=long_ad, 3=immediate/inline
        PartRef    uint16 // partition reference where this FE resides
}

// udfFID represents a File Identifier Descriptor (tag 257).
type udfFID struct {
        Name        string
        IsDir       bool
        IsParent    bool
        ICBLocation udfLongAD
}

// isUDFImage checks whether the file contains UDF Volume Recognition Sequence
// markers (BEA01/NSR02/NSR03) in sectors 16+.
func isUDFImage(f *os.File) bool {
        // VRS starts at sector 16. Scan up to sector 31 for BEA01 + NSR0x.
        buf := make([]byte, isoSectorSize)
        foundBEA := false
        foundNSR := false

        for sector := 16; sector < 32; sector++ {
                n, err := f.ReadAt(buf, int64(sector)*isoSectorSize)
                if err != nil || n < 6 {
                        continue
                }
                ident := string(buf[1:6])
                switch ident {
                case "BEA01":
                        foundBEA = true
                case "NSR02", "NSR03":
                        foundNSR = true
                }
                if foundBEA && foundNSR {
                        return true
                }
                if ident == "TEA01" {
                        break
                }
        }
        return foundBEA && foundNSR
}

// detectUDFISOType navigates the UDF root directory to determine whether
// the ISO contains a Blu-ray (BDMV/) or DVD (VIDEO_TS/) structure.
func detectUDFISOType(f *os.File) (Type, error) {
        rootEntries, err := readUDFRootDir(f)
        if err != nil {
                return TypeDVD, nil // can't parse UDF, default to DVD
        }

        hasBDMV := false
        hasVideoTS := false
        for _, fid := range rootEntries {
                name := strings.ToUpper(fid.Name)
                if name == "BDMV" {
                        hasBDMV = true
                }
                if name == "VIDEO_TS" {
                        hasVideoTS = true
                }
        }

        if hasBDMV {
                return TypeBluray, nil
        }
        if hasVideoTS {
                return TypeDVD, nil
        }
        return TypeDVD, nil
}

// findBlurayM2TSInUDF navigates the UDF filesystem to find M2TS files
// under BDMV/STREAM/. Returns isoFileExtent entries compatible with
// the ISO9660 code path.
func findBlurayM2TSInUDF(f *os.File) ([]isoFileExtent, error) {
        ctx, err := newUDFContext(f)
        if err != nil {
                return nil, err
        }

        // Read root directory
        rootFIDs, err := ctx.readDirectoryFromFE(ctx.rootFE)
        if err != nil {
                return nil, fmt.Errorf("read UDF root directory: %w", err)
        }

        // Navigate to BDMV
        bdmvFE, err := ctx.lookupDir(rootFIDs, "BDMV")
        if err != nil {
                return nil, fmt.Errorf("find BDMV: %w", err)
        }

        bdmvFIDs, err := ctx.readDirectoryFromFE(bdmvFE)
        if err != nil {
                return nil, fmt.Errorf("read BDMV directory: %w", err)
        }

        // Navigate to STREAM
        streamFE, err := ctx.lookupDir(bdmvFIDs, "STREAM")
        if err != nil {
                return nil, fmt.Errorf("find STREAM: %w", err)
        }

        streamFIDs, err := ctx.readDirectoryFromFE(streamFE)
        if err != nil {
                return nil, fmt.Errorf("read STREAM directory: %w", err)
        }

        // Collect M2TS files
        var m2tsFiles []isoFileExtent
        for _, fid := range streamFIDs {
                if fid.IsDir || fid.IsParent {
                        continue
                }
                name := strings.ToUpper(fid.Name)
                if !strings.HasSuffix(name, ".M2TS") {
                        continue
                }

                fe, err := ctx.readFileEntryAt(fid.ICBLocation)
                if err != nil {
                        continue
                }

                // Collect all physical extents for this file.
                extents, err := ctx.resolveAllExtents(fe)
                if err != nil || len(extents) == 0 {
                        continue
                }

                m2ts := isoFileExtent{
                        Name:   name,
                        Offset: extents[0].ISOOffset,
                        Size:   int64(fe.InfoLength),
                        IsDir:  false,
                }

                // Only populate Extents if the data is non-contiguous.
                if !extentsContiguous(extents) {
                        m2ts.Extents = extents
                }

                m2tsFiles = append(m2tsFiles, m2ts)
        }

        if len(m2tsFiles) == 0 {
                return nil, fmt.Errorf("no M2TS files found in UDF BDMV/STREAM/")
        }

        return m2tsFiles, nil
}

// udfContext holds the parsed UDF volume structures needed for navigation.
type udfContext struct {
        f          *os.File
        blockSize  uint32
        partStart  uint32 // physical sector of partition start
        partitions []udfPartitionDesc
        partMaps   []udfPartitionMap
        metaData   []byte // loaded metadata partition file (nil if Type 1 only)
        rootFE     *udfFileEntry
}

// newUDFContext reads and parses the UDF volume structures.
func newUDFContext(f *os.File) (*udfContext, error) {
        // Step 1: Read AVDP at sector 256 to find the VDS
        vdsExtent, err := readAVDP(f)
        if err != nil {
                return nil, fmt.Errorf("read AVDP: %w", err)
        }

        // Step 2: Read VDS to get partition and logical volume descriptors
        partDescs, lvd, err := readVDS(f, vdsExtent)
        if err != nil {
                return nil, fmt.Errorf("read VDS: %w", err)
        }
        if len(partDescs) == 0 {
                return nil, fmt.Errorf("no partition descriptors found in VDS")
        }

        // Match the first physical partition map's PartitionNum to the
        // corresponding partition descriptor. Fall back to partDescs[0].
        partStart := partDescs[0].StartingLocation
        if len(lvd.PartitionMaps) > 0 {
                targetNum := lvd.PartitionMaps[0].PartitionNum
                for _, pd := range partDescs {
                        if pd.PartitionNumber == targetNum {
                                partStart = pd.StartingLocation
                                break
                        }
                }
        }

        ctx := &udfContext{
                f:          f,
                blockSize:  lvd.BlockSize,
                partStart:  partStart,
                partitions: partDescs,
                partMaps:   lvd.PartitionMaps,
        }

        // Step 3: Load metadata partition if present
        for _, pm := range lvd.PartitionMaps {
                if pm.IsMetadata {
                        metaData, err := ctx.readMetadataFile(pm.MetaFileLoc)
                        if err != nil {
                                return nil, fmt.Errorf("read metadata partition: %w", err)
                        }
                        ctx.metaData = metaData
                        break
                }
        }

        // Step 4: Read FSD to get root directory ICB
        rootFE, err := ctx.readFSDAndRoot(lvd.FSDLocation)
        if err != nil {
                return nil, fmt.Errorf("read FSD/root: %w", err)
        }
        ctx.rootFE = rootFE

        return ctx, nil
}

// readAVDP reads the Anchor Volume Descriptor Pointer at sector 256.
// Returns the extent of the Main Volume Descriptor Sequence.
func readAVDP(f *os.File) (udfExtent, error) {
        buf := make([]byte, isoSectorSize)
        if _, err := f.ReadAt(buf, 256*isoSectorSize); err != nil {
                return udfExtent{}, fmt.Errorf("read sector 256: %w", err)
        }

        tag := parseDescriptorTag(buf)
        if tag.TagID != udfTagAVDP {
                return udfExtent{}, fmt.Errorf("sector 256: expected AVDP (tag 2), got tag %d", tag.TagID)
        }

        // Main VDS extent at offset 16 (8 bytes: length + location)
        return udfExtent{
                Length:   binary.LittleEndian.Uint32(buf[16:20]),
                Location: binary.LittleEndian.Uint32(buf[20:24]),
        }, nil
}

// readVDS reads the Volume Descriptor Sequence and extracts partition
// descriptors and the logical volume descriptor.
func readVDS(f *os.File, extent udfExtent) ([]udfPartitionDesc, *udfLogicalVolumeDesc, error) {
        var partDescs []udfPartitionDesc
        var lvd *udfLogicalVolumeDesc

        sectors := int(extent.Length) / isoSectorSize
        if sectors > 64 {
                sectors = 64
        }

        buf := make([]byte, isoSectorSize)
        for i := 0; i < sectors; i++ {
                offset := int64(extent.Location+uint32(i)) * isoSectorSize
                if _, err := f.ReadAt(buf, offset); err != nil {
                        break
                }

                tag := parseDescriptorTag(buf)
                switch tag.TagID {
                case udfTagPartitionDesc:
                        pd := udfPartitionDesc{
                                PartitionNumber:  binary.LittleEndian.Uint16(buf[22:24]),
                                StartingLocation: binary.LittleEndian.Uint32(buf[188:192]),
                        }
                        partDescs = append(partDescs, pd)

                case udfTagLogicalVolume:
                        blockSize := binary.LittleEndian.Uint32(buf[212:216])

                        // FSD location at offset 248 (16-byte long_ad)
                        fsdLoc := parseLongAD(buf[248:264])

                        // Partition maps at offset 440
                        mapTableLen := binary.LittleEndian.Uint32(buf[264:268])
                        numMaps := binary.LittleEndian.Uint32(buf[268:272])
                        mapData := buf[440:]
                        if int(mapTableLen) < len(mapData) {
                                mapData = mapData[:mapTableLen]
                        }
                        partMaps := parsePartitionMaps(mapData, int(numMaps))

                        lvd = &udfLogicalVolumeDesc{
                                BlockSize:     blockSize,
                                FSDLocation:   fsdLoc,
                                PartitionMaps: partMaps,
                        }

                case 8: // Terminating Descriptor
                        // handled below
                }

                if tag.TagID == 8 {
                        break
                }
        }

        if lvd == nil {
                return nil, nil, fmt.Errorf("no Logical Volume Descriptor found")
        }

        return partDescs, lvd, nil
}

// parsePartitionMaps parses the partition map table from the LVD.
func parsePartitionMaps(data []byte, count int) []udfPartitionMap {
        var maps []udfPartitionMap
        offset := 0
        for i := 0; i < count && offset < len(data); i++ {
                if offset+2 > len(data) {
                        break
                }
                mapType := data[offset]
                mapLen := int(data[offset+1])
                if mapLen == 0 || offset+mapLen > len(data) {
                        break
                }

                pm := udfPartitionMap{Type: mapType}

                switch mapType {
                case 1:
                        // Type 1: Physical partition (6 bytes)
                        if mapLen >= 6 {
                                pm.PartitionNum = binary.LittleEndian.Uint16(data[offset+4 : offset+6])
                        }
                case 2:
                        // Type 2: Could be metadata, virtual, or sparable (64 bytes)
                        if mapLen >= 64 {
                                pm.PartitionNum = binary.LittleEndian.Uint16(data[offset+38 : offset+40])
                                // Check for metadata partition identifier at offset 4
                                ident := string(data[offset+4 : offset+36])
                                if strings.Contains(ident, "*UDF Metadata Partition") {
                                        pm.IsMetadata = true
                                        pm.MetaFileLoc = binary.LittleEndian.Uint32(data[offset+40 : offset+44])
                                }
                        }
                }

                maps = append(maps, pm)
                offset += mapLen
        }
        return maps
}

// readMetadataFile loads the metadata virtual file from the partition.
// The metadata file is a File Entry at partStart + metaFileLoc, whose
// allocation descriptors point to the actual metadata data.
func (ctx *udfContext) readMetadataFile(metaFileLoc uint32) ([]byte, error) {
        // Read the File Entry for the metadata file
        physSector := ctx.partStart + metaFileLoc
        buf := make([]byte, ctx.blockSize)
        if _, err := ctx.f.ReadAt(buf, int64(physSector)*int64(ctx.blockSize)); err != nil {
                return nil, fmt.Errorf("read metadata file entry at sector %d: %w", physSector, err)
        }

        fe, err := parseFileEntry(buf)
        if err != nil {
                return nil, fmt.Errorf("parse metadata file entry: %w", err)
        }

        // The metadata file's FE is on the physical partition. Find the
        // physical (Type 1) partition map index so short_ad resolves correctly.
        for i, pm := range ctx.partMaps {
                if pm.Type == 1 {
                        fe.PartRef = uint16(i)
                        break
                }
        }

        return ctx.readFileData(fe)
}

// readFSDAndRoot reads the File Set Descriptor and follows it to the root
// directory File Entry.
func (ctx *udfContext) readFSDAndRoot(fsdLoc udfLongAD) (*udfFileEntry, error) {
        fsdData, err := ctx.readBlock(fsdLoc.Location, fsdLoc.PartRef)
        if err != nil {
                return nil, fmt.Errorf("read FSD block: %w", err)
        }

        tag := parseDescriptorTag(fsdData)
        if tag.TagID != udfTagFileSetDesc {
                return nil, fmt.Errorf("expected FSD (tag 256), got tag %d", tag.TagID)
        }

        // Root directory ICB at offset 400 (16-byte long_ad)
        if len(fsdData) < 416 {
                return nil, fmt.Errorf("FSD too short")
        }
        rootICB := parseLongAD(fsdData[400:416])

        return ctx.readFileEntryAt(rootICB)
}

// readFileEntryAt reads and parses a File Entry at the given location.
func (ctx *udfContext) readFileEntryAt(loc udfLongAD) (*udfFileEntry, error) {
        data, err := ctx.readBlock(loc.Location, loc.PartRef)
        if err != nil {
                return nil, fmt.Errorf("read file entry block %d (part %d): %w", loc.Location, loc.PartRef, err)
        }
        fe, err := parseFileEntry(data)
        if err != nil {
                return nil, err
        }
        fe.PartRef = loc.PartRef
        return fe, nil
}

// readDirectoryFromFE reads directory data from a File Entry and parses FIDs.
func (ctx *udfContext) readDirectoryFromFE(fe *udfFileEntry) ([]udfFID, error) {
        dirData, err := ctx.readFileData(fe)
        if err != nil {
                return nil, err
        }
        return parseUDFDirectory(dirData), nil
}

// lookupDir finds a named subdirectory in a list of FIDs and reads its File Entry.
func (ctx *udfContext) lookupDir(fids []udfFID, name string) (*udfFileEntry, error) {
        upper := strings.ToUpper(name)
        for _, fid := range fids {
                if fid.IsParent {
                        continue
                }
                if strings.ToUpper(fid.Name) == upper {
                        return ctx.readFileEntryAt(fid.ICBLocation)
                }
        }
        return nil, fmt.Errorf("%q not found in directory", name)
}

// maxAllocExtentChainDepth limits the number of type-3 allocation extent
// continuation hops to prevent infinite loops on corrupt/cyclic images.
// In practice even a badly fragmented 50 GB Blu-ray needs only 2-3 hops;
// 10000 is extremely conservative.
const maxAllocExtentChainDepth = 10000

// resolveAllExtents collects all physical extents for a file entry.
// For long_ad, each AD has an explicit partition reference.
// For short_ad, the partition is inherited from the FE.
// Handles allocation extent chaining (type 3 descriptors) for files
// whose allocation descriptors span multiple blocks.
func (ctx *udfContext) resolveAllExtents(fe *udfFileEntry) ([]isoPhysicalRange, error) {
        allocDescs := fe.AllocDescs

        switch fe.AllocType & 0x07 {
        case 0: // short_ad
                if int(fe.PartRef) < len(ctx.partMaps) && ctx.partMaps[fe.PartRef].IsMetadata {
                        return nil, fmt.Errorf("short_ad on metadata partition not supported for file extents")
                }
                var extents []isoPhysicalRange
                remaining := int64(fe.InfoLength)
                chainDepth := 0
                visited := map[[2]uint32]bool{}
                for remaining > 0 {
                        followed := false
                        for off := 0; off+8 <= len(allocDescs) && remaining > 0; off += 8 {
                                ad := parseShortAD(allocDescs[off : off+8])
                                extType := (ad.Length >> 30) & 0x03
                                extLen := int64(ad.Length & 0x3FFFFFFF)
                                if extLen == 0 {
                                        break // end-of-descriptor-list marker
                                }
                                if extType == 3 {
                                        chainDepth++
                                        if chainDepth > maxAllocExtentChainDepth {
                                                return nil, fmt.Errorf("short_ad alloc extent chain depth exceeded %d", maxAllocExtentChainDepth)
                                        }
                                        key := [2]uint32{uint32(fe.PartRef), ad.Position}
                                        if visited[key] {
                                                return nil, fmt.Errorf("cycle in short_ad alloc extent chain at block %d part %d", ad.Position, fe.PartRef)
                                        }
                                        visited[key] = true
                                        nextDescs, err := ctx.readAllocExtentBlock(ad.Position, fe.PartRef)
                                        if err != nil {
                                                return nil, fmt.Errorf("follow short_ad alloc extent chain: %w", err)
                                        }
                                        allocDescs = nextDescs
                                        followed = true
                                        break
                                }
                                if extLen > remaining {
                                        extLen = remaining
                                }
                                if extType == 0 {
                                        // Type 0: recorded and allocated — actual data extent
                                        extents = append(extents, isoPhysicalRange{
                                                ISOOffset: ctx.resolveBlockPhysical(ad.Position),
                                                Length:    extLen,
                                        })
                                }
                                // Type 1 (allocated, not recorded) and type 2 (not allocated)
                                // are sparse holes with no data on disc — skip without appending.
                                remaining -= extLen
                        }
                        if !followed {
                                break
                        }
                }
                if remaining > 0 {
                        return nil, fmt.Errorf("short_ad allocation descriptors truncated: %d bytes remaining", remaining)
                }
                return extents, nil

        case 1: // long_ad
                var extents []isoPhysicalRange
                remaining := int64(fe.InfoLength)
                chainDepth := 0
                visited := map[[2]uint32]bool{}
                for remaining > 0 {
                        followed := false
                        for off := 0; off+16 <= len(allocDescs) && remaining > 0; off += 16 {
                                ad := parseLongAD(allocDescs[off : off+16])
                                extType := (ad.Length >> 30) & 0x03
                                extLen := int64(ad.Length & 0x3FFFFFFF)
                                if extLen == 0 {
                                        break // end-of-descriptor-list marker
                                }
                                if extType == 3 {
                                        chainDepth++
                                        if chainDepth > maxAllocExtentChainDepth {
                                                return nil, fmt.Errorf("long_ad alloc extent chain depth exceeded %d", maxAllocExtentChainDepth)
                                        }
                                        key := [2]uint32{uint32(ad.PartRef), ad.Location}
                                        if visited[key] {
                                                return nil, fmt.Errorf("cycle in long_ad alloc extent chain at block %d part %d", ad.Location, ad.PartRef)
                                        }
                                        visited[key] = true
                                        nextDescs, err := ctx.readAllocExtentBlock(ad.Location, ad.PartRef)
                                        if err != nil {
                                                return nil, fmt.Errorf("follow long_ad alloc extent chain: %w", err)
                                        }
                                        allocDescs = nextDescs
                                        followed = true
                                        break
                                }
                                if extLen > remaining {
                                        extLen = remaining
                                }
                                if extType == 0 {
                                        // Type 0: recorded and allocated — actual data extent
                                        if int(ad.PartRef) < len(ctx.partMaps) && ctx.partMaps[ad.PartRef].IsMetadata {
                                                return nil, fmt.Errorf("long_ad data extent on metadata partition")
                                        }
                                        extents = append(extents, isoPhysicalRange{
                                                ISOOffset: ctx.resolveBlockPhysical(ad.Location),
                                                Length:    extLen,
                                        })
                                }
                                // Type 1 (allocated, not recorded) and type 2 (not allocated)
                                // are sparse holes with no data on disc — skip without appending.
                                remaining -= extLen
                        }
                        if !followed {
                                break
                        }
                }
                if remaining > 0 {
                        return nil, fmt.Errorf("long_ad allocation descriptors truncated: %d bytes remaining", remaining)
                }
                return extents, nil

        default:
                return nil, fmt.Errorf("unsupported alloc type %d for extent resolution", fe.AllocType&0x07)
        }
}

// readAllocExtentBlock reads a block containing continuation allocation
// descriptors (referenced by a type 3 extent). The block starts with an
// Allocation Extent Descriptor (tag 258) header, followed by the raw
// allocation descriptor data.
func (ctx *udfContext) readAllocExtentBlock(blockNum uint32, partRef uint16) ([]byte, error) {
        data, err := ctx.readBlock(blockNum, partRef)
        if err != nil {
                return nil, fmt.Errorf("read alloc extent block %d (part %d): %w", blockNum, partRef, err)
        }

        // Allocation Extent Descriptor: tag 258
        // Offset 0-15: descriptor tag
        // Offset 16-19: previous allocation extent location (uint32)
        // Offset 20-23: length of allocation descriptors (uint32)
        // Offset 24+: allocation descriptors
        if len(data) < 24 {
                return nil, fmt.Errorf("alloc extent block too short")
        }
        tag := parseDescriptorTag(data)
        if tag.TagID != 258 {
                return nil, fmt.Errorf("expected Allocation Extent Descriptor (tag 258), got tag %d", tag.TagID)
        }
        adLen := binary.LittleEndian.Uint32(data[20:24])
        remaining := len(data) - 24
        if adLen > uint32(remaining) {
                return nil, fmt.Errorf("allocation descriptor length %d exceeds remaining block bytes %d", adLen, remaining)
        }
        adLenInt := int(adLen)
        return data[24 : 24+adLenInt], nil
}

// extentsContiguous returns true if all extents are physically adjacent.
func extentsContiguous(extents []isoPhysicalRange) bool {
        for i := 1; i < len(extents); i++ {
                prevEnd := extents[i-1].ISOOffset + extents[i-1].Length
                if extents[i].ISOOffset != prevEnd {
                        return false
                }
        }
        return true
}

// readBlock reads one block from the given logical block number within
// the specified partition reference.
func (ctx *udfContext) readBlock(blockNum uint32, partRef uint16) ([]byte, error) {
        // Determine which partition map this references
        if int(partRef) < len(ctx.partMaps) && ctx.partMaps[partRef].IsMetadata {
                // Metadata partition: block is an offset into the loaded metadata data
                byteOffset := int64(blockNum) * int64(ctx.blockSize)
                if ctx.metaData == nil {
                        return nil, fmt.Errorf("metadata partition referenced but not loaded")
                }
                if byteOffset+int64(ctx.blockSize) > int64(len(ctx.metaData)) {
                        return nil, fmt.Errorf("metadata block %d out of range", blockNum)
                }
                result := make([]byte, ctx.blockSize)
                copy(result, ctx.metaData[byteOffset:byteOffset+int64(ctx.blockSize)])
                return result, nil
        }

        // Physical partition: blockNum is relative to partition start
        physOffset := int64(ctx.partStart+blockNum) * int64(ctx.blockSize)

        buf := make([]byte, ctx.blockSize)
        if _, err := ctx.f.ReadAt(buf, physOffset); err != nil {
                return nil, err
        }
        return buf, nil
}

// resolveBlockPhysical converts a logical block number to a physical byte offset
// using the default (first physical) partition.
func (ctx *udfContext) resolveBlockPhysical(blockNum uint32) int64 {
        return int64(ctx.partStart+blockNum) * int64(ctx.blockSize)
}

// readFileData reads the complete data of a file described by a File Entry.
func (ctx *udfContext) readFileData(fe *udfFileEntry) ([]byte, error) {
        if fe.InfoLength == 0 {
                return nil, nil
        }

        switch fe.AllocType & 0x07 {
        case 3: // inline/immediate
                if uint64(len(fe.AllocDescs)) < fe.InfoLength {
                        return fe.AllocDescs, nil
                }
                return fe.AllocDescs[:fe.InfoLength], nil

        case 0: // short_ad
                return ctx.readFromShortADs(fe)

        case 1: // long_ad
                return ctx.readFromLongADs(fe)

        default:
                return nil, fmt.Errorf("unsupported allocation type %d", fe.AllocType&0x07)
        }
}

// readFromShortADs reads file data described by short allocation descriptors.
// Short ADs don't carry an explicit partition reference — they inherit the
// partition of the File Entry that contains them.
func (ctx *udfContext) readFromShortADs(fe *udfFileEntry) ([]byte, error) {
        // Determine if this FE's partition is the metadata partition.
        isMeta := int(fe.PartRef) < len(ctx.partMaps) && ctx.partMaps[fe.PartRef].IsMetadata && ctx.metaData != nil

        result := make([]byte, 0, fe.InfoLength)
        remaining := int64(fe.InfoLength)

        for off := 0; off+8 <= len(fe.AllocDescs) && remaining > 0; off += 8 {
                ad := parseShortAD(fe.AllocDescs[off : off+8])
                extLen := int64(ad.Length & 0x3FFFFFFF) // mask off extent type bits
                if extLen == 0 {
                        break
                }

                toRead := min(extLen, remaining)

                if isMeta {
                        // Resolve within the loaded metadata data.
                        byteOffset := int64(ad.Position) * int64(ctx.blockSize)
                        if byteOffset+toRead > int64(len(ctx.metaData)) {
                                return nil, fmt.Errorf("metadata short_ad extent out of range (offset %d, len %d, metaLen %d)",
                                        byteOffset, toRead, len(ctx.metaData))
                        }
                        result = append(result, ctx.metaData[byteOffset:byteOffset+toRead]...)
                } else {
                        physOffset := int64(ctx.partStart+ad.Position) * int64(ctx.blockSize)
                        buf := make([]byte, toRead)
                        if _, err := ctx.f.ReadAt(buf, physOffset); err != nil {
                                return nil, fmt.Errorf("read short_ad extent at offset %d: %w", physOffset, err)
                        }
                        result = append(result, buf...)
                }
                remaining -= toRead
        }

        return result, nil
}

// readFromLongADs reads file data described by long allocation descriptors.
func (ctx *udfContext) readFromLongADs(fe *udfFileEntry) ([]byte, error) {
        result := make([]byte, 0, fe.InfoLength)
        remaining := int64(fe.InfoLength)

        for off := 0; off+16 <= len(fe.AllocDescs) && remaining > 0; off += 16 {
                ad := parseLongAD(fe.AllocDescs[off : off+16])
                extLen := int64(ad.Length & 0x3FFFFFFF) // mask off extent type bits
                if extLen == 0 {
                        break
                }

                toRead := min(extLen, remaining)

                // Check if this references the metadata partition
                if int(ad.PartRef) < len(ctx.partMaps) && ctx.partMaps[ad.PartRef].IsMetadata && ctx.metaData != nil {
                        byteOffset := int64(ad.Location) * int64(ctx.blockSize)
                        if byteOffset+toRead > int64(len(ctx.metaData)) {
                                return nil, fmt.Errorf("metadata extent out of range")
                        }
                        result = append(result, ctx.metaData[byteOffset:byteOffset+toRead]...)
                } else {
                        physOffset := int64(ctx.partStart+ad.Location) * int64(ctx.blockSize)
                        buf := make([]byte, toRead)
                        if _, err := ctx.f.ReadAt(buf, physOffset); err != nil {
                                return nil, fmt.Errorf("read long_ad extent at offset %d: %w", physOffset, err)
                        }
                        result = append(result, buf...)
                }

                remaining -= toRead
        }

        return result, nil
}

// readUDFRootDir is a convenience function that reads just the root directory
// entries from a UDF filesystem. Used by detectUDFISOType.
func readUDFRootDir(f *os.File) ([]udfFID, error) {
        ctx, err := newUDFContext(f)
        if err != nil {
                return nil, err
        }
        return ctx.readDirectoryFromFE(ctx.rootFE)
}

// --- Low-level parsing helpers ---

// parseDescriptorTag parses the 16-byte UDF descriptor tag at the start of buf.
func parseDescriptorTag(buf []byte) udfDescriptorTag {
        if len(buf) < 16 {
                return udfDescriptorTag{}
        }
        return udfDescriptorTag{
                TagID:   binary.LittleEndian.Uint16(buf[0:2]),
                Version: binary.LittleEndian.Uint16(buf[2:4]),
        }
}

// parseLongAD parses a 16-byte long allocation descriptor.
func parseLongAD(buf []byte) udfLongAD {
        return udfLongAD{
                Length:   binary.LittleEndian.Uint32(buf[0:4]),
                Location: binary.LittleEndian.Uint32(buf[4:8]),
                PartRef:  binary.LittleEndian.Uint16(buf[8:10]),
        }
}

// parseShortAD parses an 8-byte short allocation descriptor.
func parseShortAD(buf []byte) udfShortAD {
        return udfShortAD{
                Length:   binary.LittleEndian.Uint32(buf[0:4]),
                Position: binary.LittleEndian.Uint32(buf[4:8]),
        }
}

// parseFileEntry parses a UDF File Entry (tag 261) or Extended File Entry (tag 266).
func parseFileEntry(data []byte) (*udfFileEntry, error) {
        if len(data) < 16 {
                return nil, fmt.Errorf("data too short for file entry")
        }

        tag := parseDescriptorTag(data)
        if tag.TagID != udfTagFileEntry && tag.TagID != udfTagExtFileEntry {
                return nil, fmt.Errorf("expected File Entry (tag 261/266), got tag %d", tag.TagID)
        }

        // ICB Tag at offset 16 (20 bytes), file type at ICB tag offset 11 (= data offset 27)
        if len(data) < 28 {
                return nil, fmt.Errorf("data too short for ICB tag")
        }
        fileType := data[27]

        var infoLength uint64
        var allocDescsOffset int
        var allocDescsLength uint32
        var icbFlags uint16

        if tag.TagID == udfTagFileEntry {
                // File Entry (tag 261)
                // ECMA-167 14.9: L_EA at 168, L_AD at 172, alloc descs at 176+L_EA
                if len(data) < 176 {
                        return nil, fmt.Errorf("file entry too short")
                }
                infoLength = binary.LittleEndian.Uint64(data[56:64])
                icbFlags = binary.LittleEndian.Uint16(data[34:36])

                eaLen := binary.LittleEndian.Uint32(data[168:172])
                allocDescsLength = binary.LittleEndian.Uint32(data[172:176])
                allocDescsOffset = 176 + int(eaLen)
        } else {
                // Extended File Entry (tag 266)
                // ECMA-167 14.17: L_EA at 208, L_AD at 212, alloc descs at 216+L_EA
                if len(data) < 216 {
                        return nil, fmt.Errorf("extended file entry too short")
                }
                infoLength = binary.LittleEndian.Uint64(data[56:64])
                icbFlags = binary.LittleEndian.Uint16(data[34:36])

                eaLen := binary.LittleEndian.Uint32(data[208:212])
                allocDescsLength = binary.LittleEndian.Uint32(data[212:216])
                allocDescsOffset = 216 + int(eaLen)
        }

        // Guard against overflow or out-of-bounds from malformed eaLen
        if allocDescsOffset < 0 || allocDescsOffset > len(data) {
                return nil, fmt.Errorf("file entry alloc descs offset out of bounds: %d", allocDescsOffset)
        }

        var allocDescs []byte
        if allocDescsOffset+int(allocDescsLength) <= len(data) {
                allocDescs = make([]byte, allocDescsLength)
                copy(allocDescs, data[allocDescsOffset:allocDescsOffset+int(allocDescsLength)])
        }

        return &udfFileEntry{
                ICBTag:     fileType,
                InfoLength: infoLength,
                AllocDescs: allocDescs,
                AllocType:  byte(icbFlags & 0x07),
        }, nil
}

// parseUDFDirectory parses raw directory data into a list of FIDs.
func parseUDFDirectory(dirData []byte) []udfFID {
        var fids []udfFID
        offset := 0

        for offset+38 <= len(dirData) {
                tag := parseDescriptorTag(dirData[offset:])
                if tag.TagID != udfTagFID {
                        break
                }

                characteristics := dirData[offset+18]
                nameLen := int(dirData[offset+19])
                icbLoc := parseLongAD(dirData[offset+20 : offset+36])
                implUseLen := int(binary.LittleEndian.Uint16(dirData[offset+36 : offset+38]))

                nameStart := offset + 38 + implUseLen
                if nameStart+nameLen > len(dirData) {
                        break
                }

                isParent := characteristics&0x08 != 0
                isDir := characteristics&0x02 != 0

                name := ""
                if nameLen > 0 && !isParent {
                        nameBytes := dirData[nameStart : nameStart+nameLen]
                        name = decodeUDFString(nameBytes)
                }

                fids = append(fids, udfFID{
                        Name:        name,
                        IsDir:       isDir,
                        IsParent:    isParent,
                        ICBLocation: icbLoc,
                })

                // FID total length: 38 + implUseLen + nameLen, padded to 4-byte boundary
                fidLen := 38 + implUseLen + nameLen
                fidLen = (fidLen + 3) &^ 3
                offset += fidLen
        }

        return fids
}

// decodeUDFString decodes a UDF d-string/d-characters identifier.
// UDF uses either 8-bit (compression ID 8) or 16-bit (compression ID 16) encoding.
func decodeUDFString(data []byte) string {
        if len(data) == 0 {
                return ""
        }

        compressionID := data[0]
        payload := data[1:]

        switch compressionID {
        case 8:
                // 8-bit characters (Latin-1 / ASCII subset)
                return string(payload)
        case 16:
                // 16-bit big-endian Unicode (UCS-2)
                var sb strings.Builder
                for i := 0; i+1 < len(payload); i += 2 {
                        ch := rune(payload[i])<<8 | rune(payload[i+1])
                        sb.WriteRune(ch)
                }
                return sb.String()
        default:
                // Unknown compression ID — try as raw bytes
                return string(payload)
        }
}

package source

import (
        "bytes"
        "encoding/binary"
)

// FindVideoStartCodes finds all video start code positions (00 00 01 XX pattern) in the data.
// Returns the position of the first 00 in each start code.
// These are potential sync points where video frames or other structures begin.
// Optimized to use bytes.IndexByte for fast scanning (uses SIMD on x86).
func FindVideoStartCodes(data []byte) []int {
        if len(data) < 4 {
                return nil
        }

        // Pre-allocate with estimated capacity (roughly 1 start code per 2KB of video data)
        offsets := make([]int, 0, len(data)/2048+1)

        // Use bytes.IndexByte to quickly find the 0x01 byte (third byte of start code)
        // This is faster than checking every byte since IndexByte uses SIMD
        i := 2 // Start at position 2 since we need at least 00 00 before 01
        for i < len(data)-1 {
                // Find next 0x01 byte
                idx := bytes.IndexByte(data[i:], 0x01)
                if idx < 0 {
                        break
                }
                pos := i + idx

                // Check if preceded by 00 00
                if pos >= 2 && data[pos-1] == 0x00 && data[pos-2] == 0x00 {
                        offsets = append(offsets, pos-2)
                }

                // Move past this position
                i = pos + 1
        }

        return offsets
}

// FindVideoStartCodesInRange finds video start codes within a specific range.
// Returns the position of the first 00 in each start code, offset by startOffset.
// Optimized version using bytes.IndexByte for fast scanning.
func FindVideoStartCodesInRange(data []byte, startOffset int) []int {
        if len(data) < 4 {
                return nil
        }

        // Pre-allocate with estimated capacity
        offsets := make([]int, 0, len(data)/2048+1)

        i := 2
        for i < len(data)-1 {
                idx := bytes.IndexByte(data[i:], 0x01)
                if idx < 0 {
                        break
                }
                pos := i + idx

                if pos >= 2 && data[pos-1] == 0x00 && data[pos-2] == 0x00 {
                        offsets = append(offsets, startOffset+pos-2)
                }

                i = pos + 1
        }

        return offsets
}

// FindVideoNALStarts finds NAL unit start positions in Annex B formatted data.
// Returns positions of NAL header bytes (the byte AFTER 00 00 01).
// This is used for hashing: NAL header + NAL data are identical in both
// Annex B (source) and AVCC (MKV) formats, enabling cross-format matching.
func FindVideoNALStarts(data []byte) []int {
        if len(data) < 4 {
                return nil
        }

        offsets := make([]int, 0, len(data)/2048+1)

        i := 2
        for i < len(data)-1 {
                idx := bytes.IndexByte(data[i:], 0x01)
                if idx < 0 {
                        break
                }
                pos := i + idx

                // Check if preceded by 00 00 — start code is at pos-2
                // NAL header byte is at pos+1
                if pos >= 2 && data[pos-1] == 0x00 && data[pos-2] == 0x00 {
                        nalStart := pos + 1
                        if nalStart < len(data) {
                                offsets = append(offsets, nalStart)
                        }
                }

                i = pos + 1
        }

        return offsets
}

// FindVideoNALStartsInRange finds NAL unit start positions in a specific range.
// Returns positions offset by startOffset for use during chunked file processing.
func FindVideoNALStartsInRange(data []byte, startOffset int) []int {
        if len(data) < 4 {
                return nil
        }

        offsets := make([]int, 0, len(data)/2048+1)

        i := 2
        for i < len(data)-1 {
                idx := bytes.IndexByte(data[i:], 0x01)
                if idx < 0 {
                        break
                }
                pos := i + idx

                if pos >= 2 && data[pos-1] == 0x00 && data[pos-2] == 0x00 {
                        nalStart := pos + 1
                        if nalStart < len(data) {
                                offsets = append(offsets, startOffset+nalStart)
                        }
                }

                i = pos + 1
        }

        return offsets
}

// FindAVCCNALStarts finds NAL unit start positions in AVCC/HVCC formatted data.
// In AVCC format, each NAL unit is prefixed with a length field (nalLengthSize bytes,
// big-endian). Returns positions of NAL header bytes (the byte after each length prefix).
// nalLengthSize is typically 4 for H.264 AVCC and H.265 HVCC.
func FindAVCCNALStarts(data []byte, nalLengthSize int) []int {
        if nalLengthSize < 1 || nalLengthSize > 4 {
                return nil
        }
        if len(data) < nalLengthSize+1 {
                return nil
        }

        offsets := make([]int, 0, len(data)/2048+1)

        pos := 0
        for pos+nalLengthSize < len(data) {
                // Read NAL unit length
                var nalLen uint32
                switch nalLengthSize {
                case 4:
                        nalLen = binary.BigEndian.Uint32(data[pos:])
                case 3:
                        nalLen = uint32(data[pos])<<16 | uint32(data[pos+1])<<8 | uint32(data[pos+2])
                case 2:
                        nalLen = uint32(binary.BigEndian.Uint16(data[pos:]))
                case 1:
                        nalLen = uint32(data[pos])
                }

                nalStart := pos + nalLengthSize
                if nalLen == 0 || nalStart >= len(data) {
                        break
                }

                offsets = append(offsets, nalStart)

                // Move to next NAL unit
                next := nalStart + int(nalLen)
                if next <= pos {
                        break // Overflow protection
                }
                pos = next
        }

        return offsets
}

// Package testdata provides helpers for locating integration test data.
//
// Test data (Big Buck Bunny DVD ISO and MKV) is not stored in the repository.
// See README.md in this directory for setup instructions.
package testdata

import (
        "os"
        "os/exec"
        "path/filepath"
        "runtime"
        "testing"
)

// Paths contains the resolved paths to test data files.
type Paths struct {
        Root      string // Base test data directory
        ISODir    string // Directory containing ISO file
        ISOFile   string // Path to the ISO file
        MKVDir    string // Directory containing MKV file(s)
        MKVFile   string // Path to the main MKV file
        Available bool   // True if all required files exist
}

// DefaultISOName is the expected ISO filename.
const DefaultISOName = "bbb-pal.iso"

// DefaultMKVPattern is the glob pattern for finding MKV files.
const DefaultMKVPattern = "*.mkv"

// Find locates the test data directory and checks for required files.
// It checks these locations in order:
//  1. $MKVDUP_TESTDATA environment variable
//  2. testdata/generated/ (relative to the testdata package, created by generate-test-data.sh)
//  3. ~/.cache/mkvdup/testdata/
//  4. /tmp/mkvdup-testdata/
//
// Returns Paths with Available=false if test data is not found.
func Find() Paths {
        var p Paths

        // Check environment variable first
        if envPath := os.Getenv("MKVDUP_TESTDATA"); envPath != "" {
                p.Root = envPath
                if checkPaths(&p) {
                        return p
                }
        }

        // Check testdata/generated/ (local to the repo, created by generate-test-data.sh)
        // This is the preferred location for reproducible test data
        if localPath := findLocalTestdataDir(); localPath != "" {
                p.Root = localPath
                if checkPaths(&p) {
                        return p
                }
        }

        // Check ~/.cache/mkvdup/testdata/
        if home, err := os.UserHomeDir(); err == nil {
                p.Root = filepath.Join(home, ".cache", "mkvdup", "testdata")
                if checkPaths(&p) {
                        return p
                }
        }

        // Check /tmp/mkvdup-testdata/
        p.Root = "/tmp/mkvdup-testdata"
        if checkPaths(&p) {
                return p
        }

        // Not found - clear all paths
        p.Root = ""
        p.ISODir = ""
        p.ISOFile = ""
        p.MKVDir = ""
        p.MKVFile = ""
        p.Available = false
        return p
}

// checkPaths fills in the paths and returns true if all required files exist.
func checkPaths(p *Paths) bool {
        p.ISODir = filepath.Join(p.Root, "bigbuckbunny")
        p.MKVDir = filepath.Join(p.Root, "bigbuckbunny-mkv")

        // Check ISO file
        p.ISOFile = filepath.Join(p.ISODir, DefaultISOName)
        if _, err := os.Stat(p.ISOFile); err != nil {
                // Try NTSC variant
                p.ISOFile = filepath.Join(p.ISODir, "bbb-ntsc.iso")
                if _, err := os.Stat(p.ISOFile); err != nil {
                        p.Available = false
                        return false
                }
        }

        // Find MKV file (first match)
        matches, err := filepath.Glob(filepath.Join(p.MKVDir, DefaultMKVPattern))
        if err != nil || len(matches) == 0 {
                p.Available = false
                return false
        }
        p.MKVFile = matches[0]

        p.Available = true
        return true
}

// SkipIfNotAvailable calls t.Skip if test data is not available.
// Use this at the start of integration tests.
func SkipIfNotAvailable(t interface{ Skip(...interface{}) }) Paths {
        p := Find()
        if !p.Available {
                t.Skip("Test data not available. See testdata/README.md for setup instructions.")
        }
        return p
}

// CreateBlurayData creates a Blu-ray directory structure by remuxing the MKV
// file to M2TS format using ffmpeg (copy codec, no re-encoding). The directory
// is created under tmpDir and has the layout BDMV/STREAM/00001.m2ts that
// DetectType recognises as TypeBluray.
//
// The test is skipped if ffmpeg is not available.
func (p Paths) CreateBlurayData(t testing.TB, tmpDir string) string {
        t.Helper()

        if _, err := exec.LookPath("ffmpeg"); err != nil {
                t.Skip("ffmpeg not available, skipping Blu-ray test")
        }

        blurayRoot := filepath.Join(tmpDir, "bluray")
        streamDir := filepath.Join(blurayRoot, "BDMV", "STREAM")
        if err := os.MkdirAll(streamDir, 0755); err != nil {
                t.Fatalf("CreateBlurayData: mkdir: %v", err)
        }

        m2tsPath := filepath.Join(streamDir, "00001.m2ts")
        cmd := exec.Command("ffmpeg",
                "-loglevel", "error",
                "-i", p.MKVFile,
                "-c", "copy",
                "-f", "mpegts",
                "-y", // overwrite if exists
                m2tsPath,
        )
        output, err := cmd.CombinedOutput()
        if err != nil {
                t.Fatalf("CreateBlurayData: ffmpeg remux failed: %v\n%s", err, output)
        }

        return blurayRoot
}

// findLocalTestdataDir returns the path to testdata/generated/ directory
// relative to this source file, or empty string if it cannot be determined.
func findLocalTestdataDir() string {
        _, filename, _, ok := runtime.Caller(0)
        if !ok {
                return ""
        }
        // filename is the path to this file (testdata.go)
        // We want the "generated" subdirectory in the same directory
        dir := filepath.Dir(filename)
        return filepath.Join(dir, "generated")
}