Implementation:FlagOpen FlagEmbedding MLVU Topic Reasoning Data

Knowledge Sources	FlagOpen_FlagEmbedding
Domains	Video Understanding, Reasoning, Topic Classification
Last Updated	2026-02-09 00:00 GMT

Overview

Benchmark dataset for topic identification and reasoning about video content themes and subjects.

Description

The MLVU Topic Reasoning dataset contains 3434 questions that test models' ability to identify and reason about high-level topics, themes, and subjects in videos. Questions focus on understanding the overall context, genre, setting, and main subjects of videos rather than specific details. This requires holistic video comprehension and the ability to abstract from specific events to general themes.

Questions typically ask about:

Main background or setting of videos
Genre or film type classification
Primary subjects or themes
Overall context and environment
Color schemes and visual styles

This tests higher-level semantic understanding beyond simple object or action recognition.

Usage

Use this dataset for evaluating semantic video understanding, benchmarking genre classification capabilities, or training models on high-level video comprehension tasks.

Code Reference

Source Location

Repository: FlagOpen_FlagEmbedding
File: research/MLVU/data/7_topic_reasoning.json

Data Structure

{
    "video": str,              # Video filename
    "duration": int,           # Video duration in seconds
    "question": str,           # Question about topic/theme
    "candidates": List[str],   # Four candidate answers
    "answer": str,             # Correct answer
    "question_type": str       # Always "topic_reasoning"
}

Import

import json

# Load topic reasoning dataset
with open("research/MLVU/data/7_topic_reasoning.json", "r") as f:
    topic_data = [json.loads(line) for line in f]

I/O Contract

Inputs

Name	Type	Required	Description
file_path	str	Yes	Path to the topic reasoning dataset JSON file

Outputs

Field	Type	Description
video	str	Video filename
duration	int	Video duration in seconds
question	str	Question about video topic/theme
candidates	List[str]	Four possible answers
answer	str	Correct answer
question_type	str	Type identifier ("topic_reasoning")

Usage Examples

import json
from typing import List, Dict
from collections import Counter

# Load topic reasoning dataset
def load_topic_data(file_path: str) -> List[Dict]:
    with open(file_path, "r") as f:
        return [json.loads(line) for line in f]

data = load_topic_data("research/MLVU/data/7_topic_reasoning.json")

# Example entry
example = data[0]
print(f"Video: {example['video']}")
print(f"Duration: {example['duration']}s")
print(f"Question: {example['question']}")
print(f"Candidates: {example['candidates']}")
print(f"Answer: {example['answer']}")

# Output:
# Video: AWA-6.mp4
# Duration: 450s
# Question: What is the main background of the video?
# Candidates: ['Grassland', 'Lake', 'Ocean', 'Desert']
# Answer: Grassland

# Categorize question types
def categorize_questions(data: List[Dict]) -> Dict[str, List[Dict]]:
    categories = {
        "background": [],
        "film_type": [],
        "color": [],
        "theme": [],
        "other": []
    }

    for item in data:
        question_lower = item['question'].lower()

        if "background" in question_lower:
            categories["background"].append(item)
        elif "film" in question_lower or "type" in question_lower:
            categories["film_type"].append(item)
        elif "color" in question_lower:
            categories["color"].append(item)
        elif "theme" in question_lower or "main" in question_lower:
            categories["theme"].append(item)
        else:
            categories["other"].append(item)

    return categories

categories = categorize_questions(data)
print("Question categories:")
for cat, items in categories.items():
    print(f"  {cat}: {len(items)}")

# Evaluate topic reasoning
def evaluate_topic_reasoning(model, data: List[Dict]) -> Dict[str, float]:
    correct = 0
    total = len(data)

    # Track accuracy by question category
    category_results = {}

    for item in data:
        video_path = f"videos/{item['video']}"

        # Model prediction
        predicted = model.identify_topic(
            video_path,
            item['question'],
            item['candidates']
        )

        is_correct = (predicted == item['answer'])
        correct += is_correct

        # Categorize question
        question_lower = item['question'].lower()
        if "background" in question_lower:
            category = "background"
        elif "film" in question_lower or "type" in question_lower:
            category = "film_type"
        elif "color" in question_lower:
            category = "color"
        else:
            category = "general"

        if category not in category_results:
            category_results[category] = {"correct": 0, "total": 0}

        category_results[category]["total"] += 1
        category_results[category]["correct"] += is_correct

    overall_acc = correct / total

    # Calculate per-category accuracy
    category_acc = {
        f"{cat}_acc": stats["correct"] / stats["total"]
        for cat, stats in category_results.items()
    }

    return {
        "overall_acc": overall_acc,
        **category_acc
    }

# Analyze answer distributions
def analyze_answers(data: List[Dict]) -> Dict:
    # Get all unique answer values
    all_answers = [item['answer'] for item in data]
    answer_counts = Counter(all_answers)

    # Film types
    film_questions = [
        item for item in data
        if "film" in item['question'].lower() or "type" in item['question'].lower()
    ]
    film_types = Counter(item['answer'] for item in film_questions)

    # Background types
    background_questions = [
        item for item in data
        if "background" in item['question'].lower()
    ]
    background_types = Counter(item['answer'] for item in background_questions)

    return {
        "total_unique_answers": len(answer_counts),
        "most_common_answers": answer_counts.most_common(10),
        "film_types": dict(film_types),
        "background_types": dict(background_types)
    }

analysis = analyze_answers(data)
print("Answer analysis:", analysis)

# Filter by video source
movie_videos = [item for item in data if "movie" in item['video'].lower()]
tv_videos = [item for item in data if "tv" in item['video'].lower()]
other_videos = [item for item in data if "movie" not in item['video'].lower()
                and "tv" not in item['video'].lower()]

print(f"Movie videos: {len(movie_videos)}")
print(f"TV videos: {len(tv_videos)}")
print(f"Other videos: {len(other_videos)}")

Related Pages

Page Connections

Double-click a node to navigate. Hold to expand connections.

Principle

Implementation

Heuristic

Environment