Jump to content

Connect Leeroopedia MCP: Equip your AI agents to search best practices, build plans, verify code, diagnose failures, and look up hyperparameter defaults.

Implementation:FlagOpen FlagEmbedding MLVU Topic Reasoning Data

From Leeroopedia


Knowledge Sources
Domains Video Understanding, Reasoning, Topic Classification
Last Updated 2026-02-09 00:00 GMT

Overview

Benchmark dataset for topic identification and reasoning about video content themes and subjects.

Description

The MLVU Topic Reasoning dataset contains 3434 questions that test models' ability to identify and reason about high-level topics, themes, and subjects in videos. Questions focus on understanding the overall context, genre, setting, and main subjects of videos rather than specific details. This requires holistic video comprehension and the ability to abstract from specific events to general themes.

Questions typically ask about:

  • Main background or setting of videos
  • Genre or film type classification
  • Primary subjects or themes
  • Overall context and environment
  • Color schemes and visual styles

This tests higher-level semantic understanding beyond simple object or action recognition.

Usage

Use this dataset for evaluating semantic video understanding, benchmarking genre classification capabilities, or training models on high-level video comprehension tasks.

Code Reference

Source Location

Data Structure

{
    "video": str,              # Video filename
    "duration": int,           # Video duration in seconds
    "question": str,           # Question about topic/theme
    "candidates": List[str],   # Four candidate answers
    "answer": str,             # Correct answer
    "question_type": str       # Always "topic_reasoning"
}

Import

import json

# Load topic reasoning dataset
with open("research/MLVU/data/7_topic_reasoning.json", "r") as f:
    topic_data = [json.loads(line) for line in f]

I/O Contract

Inputs

Name Type Required Description
file_path str Yes Path to the topic reasoning dataset JSON file

Outputs

Field Type Description
video str Video filename
duration int Video duration in seconds
question str Question about video topic/theme
candidates List[str] Four possible answers
answer str Correct answer
question_type str Type identifier ("topic_reasoning")

Usage Examples

import json
from typing import List, Dict
from collections import Counter

# Load topic reasoning dataset
def load_topic_data(file_path: str) -> List[Dict]:
    with open(file_path, "r") as f:
        return [json.loads(line) for line in f]

data = load_topic_data("research/MLVU/data/7_topic_reasoning.json")

# Example entry
example = data[0]
print(f"Video: {example['video']}")
print(f"Duration: {example['duration']}s")
print(f"Question: {example['question']}")
print(f"Candidates: {example['candidates']}")
print(f"Answer: {example['answer']}")

# Output:
# Video: AWA-6.mp4
# Duration: 450s
# Question: What is the main background of the video?
# Candidates: ['Grassland', 'Lake', 'Ocean', 'Desert']
# Answer: Grassland

# Categorize question types
def categorize_questions(data: List[Dict]) -> Dict[str, List[Dict]]:
    categories = {
        "background": [],
        "film_type": [],
        "color": [],
        "theme": [],
        "other": []
    }

    for item in data:
        question_lower = item['question'].lower()

        if "background" in question_lower:
            categories["background"].append(item)
        elif "film" in question_lower or "type" in question_lower:
            categories["film_type"].append(item)
        elif "color" in question_lower:
            categories["color"].append(item)
        elif "theme" in question_lower or "main" in question_lower:
            categories["theme"].append(item)
        else:
            categories["other"].append(item)

    return categories

categories = categorize_questions(data)
print("Question categories:")
for cat, items in categories.items():
    print(f"  {cat}: {len(items)}")

# Evaluate topic reasoning
def evaluate_topic_reasoning(model, data: List[Dict]) -> Dict[str, float]:
    correct = 0
    total = len(data)

    # Track accuracy by question category
    category_results = {}

    for item in data:
        video_path = f"videos/{item['video']}"

        # Model prediction
        predicted = model.identify_topic(
            video_path,
            item['question'],
            item['candidates']
        )

        is_correct = (predicted == item['answer'])
        correct += is_correct

        # Categorize question
        question_lower = item['question'].lower()
        if "background" in question_lower:
            category = "background"
        elif "film" in question_lower or "type" in question_lower:
            category = "film_type"
        elif "color" in question_lower:
            category = "color"
        else:
            category = "general"

        if category not in category_results:
            category_results[category] = {"correct": 0, "total": 0}

        category_results[category]["total"] += 1
        category_results[category]["correct"] += is_correct

    overall_acc = correct / total

    # Calculate per-category accuracy
    category_acc = {
        f"{cat}_acc": stats["correct"] / stats["total"]
        for cat, stats in category_results.items()
    }

    return {
        "overall_acc": overall_acc,
        **category_acc
    }

# Analyze answer distributions
def analyze_answers(data: List[Dict]) -> Dict:
    # Get all unique answer values
    all_answers = [item['answer'] for item in data]
    answer_counts = Counter(all_answers)

    # Film types
    film_questions = [
        item for item in data
        if "film" in item['question'].lower() or "type" in item['question'].lower()
    ]
    film_types = Counter(item['answer'] for item in film_questions)

    # Background types
    background_questions = [
        item for item in data
        if "background" in item['question'].lower()
    ]
    background_types = Counter(item['answer'] for item in background_questions)

    return {
        "total_unique_answers": len(answer_counts),
        "most_common_answers": answer_counts.most_common(10),
        "film_types": dict(film_types),
        "background_types": dict(background_types)
    }

analysis = analyze_answers(data)
print("Answer analysis:", analysis)

# Filter by video source
movie_videos = [item for item in data if "movie" in item['video'].lower()]
tv_videos = [item for item in data if "tv" in item['video'].lower()]
other_videos = [item for item in data if "movie" not in item['video'].lower()
                and "tv" not in item['video'].lower()]

print(f"Movie videos: {len(movie_videos)}")
print(f"TV videos: {len(tv_videos)}")
print(f"Other videos: {len(other_videos)}")

Related Pages

Page Connections

Double-click a node to navigate. Hold to expand connections.
Principle
Implementation
Heuristic
Environment