Examples and Use Cases#

This page showcases real-world examples of using epub-utils for various tasks. Each example includes both CLI and Python API approaches where applicable.

Digital Library Management#

Cataloging Your EPUB Collection#

Scenario: You have a large collection of EPUB files and want to create a comprehensive catalog.

CLI Approach:

#!/bin/bash
# catalog-epubs.sh - Create a catalog of all EPUB files

echo "Creating EPUB catalog..."
echo "File,Title,Author,Publisher,Language,Year,Files,Size" > epub_catalog.csv

find . -name "*.epub" -type f | while read -r epub; do
    echo "Processing: $epub"

    # Extract metadata using epub-utils
    metadata=$(epub-utils "$epub" metadata --format kv 2>/dev/null)

    if [ $? -eq 0 ]; then
        title=$(echo "$metadata" | grep "^title:" | cut -d' ' -f2- | sed 's/,/;/g')
        author=$(echo "$metadata" | grep "^creator:" | cut -d' ' -f2- | sed 's/,/;/g')
        publisher=$(echo "$metadata" | grep "^publisher:" | cut -d' ' -f2- | sed 's/,/;/g')
        language=$(echo "$metadata" | grep "^language:" | cut -d' ' -f2-)
        year=$(echo "$metadata" | grep "^date:" | cut -d' ' -f2- | cut -d'-' -f1)

        # Count files and get size
        file_count=$(epub-utils "$epub" files --format raw 2>/dev/null | wc -l)
        size=$(stat -f%z "$epub" 2>/dev/null || stat -c%s "$epub" 2>/dev/null)

        echo "$epub,$title,$author,$publisher,$language,$year,$file_count,$size" >> epub_catalog.csv
    else
        echo "$epub,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR" >> epub_catalog.csv
    fi
done

echo "Catalog complete! See epub_catalog.csv"

Python Approach:

import csv
import os
from pathlib import Path
from epub_utils import Document

def create_epub_catalog(directory, output_file="epub_catalog.csv"):
    """Create a comprehensive catalog of EPUB files."""

    fieldnames = [
        'filepath', 'filename', 'title', 'author', 'publisher',
        'language', 'year', 'isbn', 'file_count', 'size_bytes', 'size_mb'
    ]

    epub_files = list(Path(directory).rglob("*.epub"))
    print(f"Found {len(epub_files)} EPUB files")

    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for i, epub_path in enumerate(epub_files, 1):
            print(f"Processing {i}/{len(epub_files)}: {epub_path.name}")

            try:
                doc = Document(str(epub_path))
                metadata = doc.package.metadata

                # Extract date year
                date_str = getattr(metadata, 'date', '')
                year = date_str.split('-')[0] if date_str else ''

                # Get file size
                size_bytes = epub_path.stat().st_size
                size_mb = round(size_bytes / (1024 * 1024), 2)

                row = {
                    'filepath': str(epub_path),
                    'filename': epub_path.name,
                    'title': getattr(metadata, 'title', ''),
                    'author': getattr(metadata, 'creator', ''),
                    'publisher': getattr(metadata, 'publisher', ''),
                    'language': getattr(metadata, 'language', ''),
                    'year': year,
                    'isbn': getattr(metadata, 'identifier', ''),
                    'file_count': len(doc.get_files_info()),
                    'size_bytes': size_bytes,
                    'size_mb': size_mb
                }

                writer.writerow(row)

            except Exception as e:
                print(f"  Error: {e}")
                # Write error row
                writer.writerow({
                    'filepath': str(epub_path),
                    'filename': epub_path.name,
                    'title': f'ERROR: {str(e)}',
                    'author': '',
                    'publisher': '',
                    'language': '',
                    'year': '',
                    'isbn': '',
                    'file_count': 0,
                    'size_bytes': epub_path.stat().st_size,
                    'size_mb': 0
                })

# Usage
create_epub_catalog("/path/to/your/epub/collection")

Quality Assurance and Validation#

EPUB Health Check#

Scenario: Validate EPUB files and identify potential issues.

from epub_utils import Document, ParseError
import zipfile
from pathlib import Path

class EPUBHealthChecker:
    def __init__(self):
        self.issues = []

    def check_epub(self, epub_path):
        """Comprehensive EPUB health check."""
        self.issues = []
        epub_path = Path(epub_path)

        print(f"Checking EPUB: {epub_path.name}")

        # Basic file checks
        if not epub_path.exists():
            self.issues.append("File does not exist")
            return self.get_report()

        if epub_path.stat().st_size == 0:
            self.issues.append("File is empty")
            return self.get_report()

        # ZIP integrity check
        try:
            with zipfile.ZipFile(epub_path, 'r') as zf:
                corrupt_files = zf.testzip()
                if corrupt_files:
                    self.issues.append(f"Corrupt ZIP file: {corrupt_files}")
        except zipfile.BadZipFile:
            self.issues.append("Invalid ZIP file")
            return self.get_report()

        # EPUB structure checks
        try:
            doc = Document(str(epub_path))
            self._check_container(doc)
            self._check_package(doc)
            self._check_metadata(doc)
            self._check_manifest(doc)
            self._check_files(doc)

        except ParseError as e:
            self.issues.append(f"Parse error: {e}")
        except Exception as e:
            self.issues.append(f"Unexpected error: {e}")

        return self.get_report()

    def _check_container(self, doc):
        """Check container structure."""
        try:
            container = doc.container
            if not container.rootfile_path:
                self.issues.append("No rootfile specified in container")
        except Exception as e:
            self.issues.append(f"Container error: {e}")

    def _check_package(self, doc):
        """Check package/OPF file."""
        try:
            package = doc.package
            if not hasattr(package, 'metadata'):
                self.issues.append("Package missing metadata")
            if not hasattr(package, 'manifest'):
                self.issues.append("Package missing manifest")
            if not hasattr(package, 'spine'):
                self.issues.append("Package missing spine")
        except Exception as e:
            self.issues.append(f"Package error: {e}")

    def _check_metadata(self, doc):
        """Check metadata quality."""
        try:
            metadata = doc.package.metadata

            # Check required fields
            if not getattr(metadata, 'title', '').strip():
                self.issues.append("Missing or empty title")
            if not getattr(metadata, 'language', '').strip():
                self.issues.append("Missing or empty language")
            if not getattr(metadata, 'identifier', '').strip():
                self.issues.append("Missing or empty identifier")

        except Exception as e:
            self.issues.append(f"Metadata error: {e}")

    def _check_manifest(self, doc):
        """Check manifest integrity."""
        try:
            manifest = doc.package.manifest
            if not manifest.items:
                self.issues.append("Empty manifest")

            # Check for common content types
            has_html = any(
                item.get('media-type') == 'application/xhtml+xml'
                for item in manifest.items.values()
            )
            if not has_html:
                self.issues.append("No XHTML content files found")

        except Exception as e:
            self.issues.append(f"Manifest error: {e}")

    def _check_files(self, doc):
        """Check file structure."""
        try:
            files_info = doc.get_files_info()
            if len(files_info) < 3:  # At least container, package, and one content file
                self.issues.append("Very few files in EPUB (possibly incomplete)")

            # Check for suspiciously large files
            for file_info in files_info:
                if file_info['size'] > 10 * 1024 * 1024:  # 10MB
                    self.issues.append(f"Large file found: {file_info['path']} ({file_info['size']} bytes)")

        except Exception as e:
            self.issues.append(f"File check error: {e}")

    def get_report(self):
        """Generate health check report."""
        if not self.issues:
            return {"status": "healthy", "issues": []}
        else:
            return {"status": "issues_found", "issues": self.issues}

# Usage
checker = EPUBHealthChecker()
report = checker.check_epub("book.epub")

if report["status"] == "healthy":
    print("✓ EPUB is healthy!")
else:
    print("⚠ Issues found:")
    for issue in report["issues"]:
        print(f"  - {issue}")

Metadata Management#

Standardizing Metadata#

Scenario: Clean and standardize metadata across your EPUB collection.

import re
from epub_utils import Document

class MetadataStandardizer:
    def __init__(self):
        self.language_codes = {
            'english': 'en',
            'spanish': 'es',
            'french': 'fr',
            'german': 'de',
            'italian': 'it'
            # Add more as needed
        }

    def analyze_metadata(self, epub_path):
        """Analyze and suggest metadata improvements."""
        doc = Document(epub_path)
        metadata = doc.package.metadata
        suggestions = []

        # Check title
        title = getattr(metadata, 'title', '')
        if not title:
            suggestions.append("Missing title")
        elif len(title) > 200:
            suggestions.append("Title is very long (>200 chars)")
        elif title.isupper():
            suggestions.append("Title is all uppercase - consider title case")

        # Check author
        creator = getattr(metadata, 'creator', '')
        if not creator:
            suggestions.append("Missing author/creator")
        elif ',' not in creator and len(creator.split()) > 2:
            suggestions.append("Author name might need reformatting (Last, First)")

        # Check language
        language = getattr(metadata, 'language', '')
        if not language:
            suggestions.append("Missing language code")
        elif len(language) > 3:
            # Might be full language name instead of code
            lang_lower = language.lower()
            if lang_lower in self.language_codes:
                suggestions.append(f"Use language code '{self.language_codes[lang_lower]}' instead of '{language}'")

        # Check identifier
        identifier = getattr(metadata, 'identifier', '')
        if not identifier:
            suggestions.append("Missing identifier")
        elif not self._is_valid_identifier(identifier):
            suggestions.append("Identifier format might be invalid")

        # Check date format
        date = getattr(metadata, 'date', '')
        if date and not re.match(r'\d{4}(-\d{2}-\d{2})?', date):
            suggestions.append("Date should be in YYYY or YYYY-MM-DD format")

        return {
            'file': epub_path,
            'current_metadata': {
                'title': title,
                'creator': creator,
                'language': language,
                'identifier': identifier,
                'date': date
            },
            'suggestions': suggestions
        }

    def _is_valid_identifier(self, identifier):
        """Check if identifier looks valid."""
        # Check for ISBN, DOI, UUID patterns
        patterns = [
            r'urn:isbn:\d{10,13}',  # ISBN URN
            r'isbn:\d{10,13}',      # Simple ISBN
            r'urn:uuid:[a-f0-9-]{36}',  # UUID URN
            r'doi:10\.\d+/.+',      # DOI
            r'urn:doi:10\.\d+/.+'   # DOI URN
        ]

        return any(re.match(pattern, identifier, re.I) for pattern in patterns)

# Usage
standardizer = MetadataStandardizer()
analysis = standardizer.analyze_metadata("book.epub")

print(f"Analyzing: {analysis['file']}")
if analysis['suggestions']:
    print("Suggestions for improvement:")
    for suggestion in analysis['suggestions']:
        print(f"  - {suggestion}")
else:
    print("Metadata looks good!")

Content Analysis and Statistics#

Reading Level Analysis#

Scenario: Analyze EPUB content to determine reading complexity.

import re
import math
from epub_utils import Document

class ReadingLevelAnalyzer:
    def analyze_epub(self, epub_path):
        """Analyze reading level of an EPUB."""
        doc = Document(epub_path)

        # Get all text content
        all_text = self._extract_all_text(doc)

        if not all_text.strip():
            return {"error": "No readable text found"}

        # Calculate statistics
        stats = self._calculate_text_stats(all_text)

        # Calculate reading level scores
        flesch_score = self._flesch_reading_ease(stats)
        flesch_grade = self._flesch_kincaid_grade(stats)

        return {
            'title': getattr(doc.package.metadata, 'title', 'Unknown'),
            'word_count': stats['words'],
            'sentence_count': stats['sentences'],
            'syllable_count': stats['syllables'],
            'avg_words_per_sentence': round(stats['words'] / stats['sentences'], 2),
            'avg_syllables_per_word': round(stats['syllables'] / stats['words'], 2),
            'flesch_reading_ease': round(flesch_score, 2),
            'flesch_kincaid_grade': round(flesch_grade, 2),
            'reading_level': self._interpret_flesch_score(flesch_score)
        }

    def _extract_all_text(self, doc):
        """Extract all readable text from EPUB."""
        # This is a simplified version - real implementation would
        # need to parse XHTML content files
        try:
            manifest = doc.package.manifest
            # In a real implementation, you'd extract and parse each content file
            # For now, return placeholder
            return "Sample text for analysis. This would contain the actual book content."
        except Exception:
            return ""

    def _calculate_text_stats(self, text):
        """Calculate basic text statistics."""
        # Clean text
        text = re.sub(r'[^\w\s\.\!\?]', '', text)

        # Count words
        words = len(text.split())

        # Count sentences
        sentences = len(re.findall(r'[.!?]+', text))
        if sentences == 0:
            sentences = 1  # Avoid division by zero

        # Count syllables (simplified)
        syllables = self._count_syllables(text)

        return {
            'words': words,
            'sentences': sentences,
            'syllables': syllables
        }

    def _count_syllables(self, text):
        """Simplified syllable counting."""
        words = text.lower().split()
        syllable_count = 0

        for word in words:
            word = re.sub(r'[^a-z]', '', word)
            if word:
                # Simple syllable counting heuristic
                vowels = 'aeiouy'
                syllables = sum(1 for i, char in enumerate(word)
                              if char in vowels and (i == 0 or word[i-1] not in vowels))
                if word.endswith('e') and syllables > 1:
                    syllables -= 1
                syllable_count += max(1, syllables)

        return syllable_count

    def _flesch_reading_ease(self, stats):
        """Calculate Flesch Reading Ease score."""
        return (206.835 -
                (1.015 * (stats['words'] / stats['sentences'])) -
                (84.6 * (stats['syllables'] / stats['words'])))

    def _flesch_kincaid_grade(self, stats):
        """Calculate Flesch-Kincaid Grade Level."""
        return ((0.39 * (stats['words'] / stats['sentences'])) +
                (11.8 * (stats['syllables'] / stats['words'])) - 15.59)

    def _interpret_flesch_score(self, score):
        """Interpret Flesch Reading Ease score."""
        if score >= 90:
            return "Very Easy (5th grade)"
        elif score >= 80:
            return "Easy (6th grade)"
        elif score >= 70:
            return "Fairly Easy (7th grade)"
        elif score >= 60:
            return "Standard (8th-9th grade)"
        elif score >= 50:
            return "Fairly Difficult (10th-12th grade)"
        elif score >= 30:
            return "Difficult (College level)"
        else:
            return "Very Difficult (Graduate level)"

# Usage
analyzer = ReadingLevelAnalyzer()
analysis = analyzer.analyze_epub("book.epub")

print(f"Reading Level Analysis for: {analysis['title']}")
print(f"Word Count: {analysis['word_count']:,}")
print(f"Reading Level: {analysis['reading_level']}")
print(f"Flesch-Kincaid Grade: {analysis['flesch_kincaid_grade']}")

Direct File Access and Extraction#

Scenario: Extract specific files from EPUB archives for processing or analysis.

CLI Approach:

#!/bin/bash
# extract-epub-assets.sh - Extract and process EPUB content files

epub_file="$1"
output_dir="extracted_content"

mkdir -p "$output_dir"

echo "Extracting content from: $epub_file"

# Get list of all XHTML content files
epub-utils "$epub_file" files --format raw | grep '\.xhtml$' | while read -r file_path; do
    echo "Processing: $file_path"

    # Extract plain text content
    safe_name=$(echo "$file_path" | tr '/' '_')
    epub-utils "$epub_file" files "$file_path" --format plain > "$output_dir/${safe_name}.txt"

    # Extract styled HTML content
    epub-utils "$epub_file" files "$file_path" --format raw > "$output_dir/${safe_name}.html"
done

# Extract CSS files for styling reference
epub-utils "$epub_file" files --format raw | grep '\.css$' | while read -r css_path; do
    echo "Extracting CSS: $css_path"
    safe_name=$(echo "$css_path" | tr '/' '_')
    epub-utils "$epub_file" files "$css_path" > "$output_dir/${safe_name}"
done

echo "Extraction complete! Files saved to $output_dir/"

Comparing files vs content commands:

# Using files command (direct path access)
epub-utils book.epub files OEBPS/chapter1.xhtml --format plain
epub-utils book.epub files OEBPS/styles/main.css
epub-utils book.epub files META-INF/container.xml

# Using content command (requires manifest item ID)
epub-utils book.epub manifest | grep chapter1  # Find the ID first
epub-utils book.epub content chapter1-id --format plain

Key advantages of the files command:

  • Direct access: Use actual file paths without needing manifest IDs

  • Universal file access: Access any file type (XHTML, CSS, XML, images, etc.)

  • Simpler automation: No need to parse manifest to find item IDs

  • Better for file-system-based workflows: Mirrors actual EPUB structure

Python equivalent using API:

from epub_utils import Document

def extract_file_content(epub_path, file_path):
    """Extract content from a specific file in EPUB."""
    doc = Document(epub_path)

    try:
        content = doc.get_file_by_path(file_path)

        # Handle different content types
        if hasattr(content, 'to_plain'):
            # XHTML content - can extract plain text
            return {
                'raw_html': content.to_str(),
                'plain_text': content.to_plain(),
                'formatted_xml': content.to_xml(pretty_print=True)
            }
        else:
            # Other file types (CSS, XML, etc.)
            return {'raw_content': content}

    except ValueError as e:
        return {'error': str(e)}

# Usage
doc = Document("book.epub")

# Extract chapter content
chapter_content = extract_file_content("book.epub", "OEBPS/chapter1.xhtml")
if 'plain_text' in chapter_content:
    print(f"Chapter text: {chapter_content['plain_text'][:200]}...")

# Extract CSS for styling analysis
css_content = extract_file_content("book.epub", "OEBPS/styles/main.css")
if 'raw_content' in css_content:
    print(f"CSS rules: {len(css_content['raw_content'].split('{'))} rules found")

Automation and Workflows#

Automated EPUB Processing Pipeline#

Scenario: Set up an automated pipeline for processing new EPUB files.

import os
import shutil
import json
from pathlib import Path
from datetime import datetime
from epub_utils import Document

class EPUBProcessor:
    def __init__(self, input_dir, output_dir, processed_dir):
        self.input_dir = Path(input_dir)
        self.output_dir = Path(output_dir)
        self.processed_dir = Path(processed_dir)

        # Create directories if they don't exist
        self.output_dir.mkdir(exist_ok=True)
        self.processed_dir.mkdir(exist_ok=True)

    def process_new_files(self):
        """Process all new EPUB files in input directory."""
        epub_files = list(self.input_dir.glob("*.epub"))

        if not epub_files:
            print("No EPUB files found to process")
            return

        print(f"Found {len(epub_files)} EPUB files to process")

        results = []
        for epub_path in epub_files:
            result = self.process_single_file(epub_path)
            results.append(result)

        # Generate processing report
        self.generate_report(results)

        return results

    def process_single_file(self, epub_path):
        """Process a single EPUB file."""
        print(f"Processing: {epub_path.name}")

        try:
            doc = Document(str(epub_path))

            # Extract metadata
            metadata = self.extract_metadata(doc)

            # Validate file
            validation_result = self.validate_epub(doc)

            # Generate file info
            file_info = self.generate_file_info(epub_path, doc)

            # Create organized filename
            new_filename = self.create_organized_filename(metadata)

            # Move file to organized location
            organized_path = self.organize_file(epub_path, new_filename, metadata)

            result = {
                'original_path': str(epub_path),
                'new_path': str(organized_path),
                'status': 'success',
                'metadata': metadata,
                'validation': validation_result,
                'file_info': file_info,
                'processed_at': datetime.now().isoformat()
            }

            # Move original to processed directory
            processed_path = self.processed_dir / epub_path.name
            shutil.move(str(epub_path), str(processed_path))

            return result

        except Exception as e:
            result = {
                'original_path': str(epub_path),
                'status': 'error',
                'error': str(e),
                'processed_at': datetime.now().isoformat()
            }

            # Move problematic file to processed directory
            processed_path = self.processed_dir / f"ERROR_{epub_path.name}"
            shutil.move(str(epub_path), str(processed_path))

            return result

    def extract_metadata(self, doc):
        """Extract standardized metadata."""
        metadata = doc.package.metadata

        return {
            'title': getattr(metadata, 'title', '').strip(),
            'author': getattr(metadata, 'creator', '').strip(),
            'publisher': getattr(metadata, 'publisher', '').strip(),
            'language': getattr(metadata, 'language', '').strip(),
            'year': self.extract_year(getattr(metadata, 'date', '')),
            'identifier': getattr(metadata, 'identifier', '').strip(),
            'subject': getattr(metadata, 'subject', '').strip()
        }

    def extract_year(self, date_str):
        """Extract year from date string."""
        if not date_str:
            return ''
        return date_str.split('-')[0] if '-' in date_str else date_str[:4]

    def validate_epub(self, doc):
        """Basic EPUB validation."""
        issues = []

        try:
            metadata = doc.package.metadata

            if not getattr(metadata, 'title', '').strip():
                issues.append('Missing title')
            if not getattr(metadata, 'creator', '').strip():
                issues.append('Missing author')
            if not getattr(metadata, 'language', '').strip():
                issues.append('Missing language')

            # Check for content
            manifest = doc.package.manifest
            has_content = any(
                item.get('media-type') == 'application/xhtml+xml'
                for item in manifest.items.values()
            )

            if not has_content:
                issues.append('No content files found')

        except Exception as e:
            issues.append(f'Validation error: {e}')

        return {
            'is_valid': len(issues) == 0,
            'issues': issues
        }

    def generate_file_info(self, epub_path, doc):
        """Generate file information."""
        stat = epub_path.stat()

        return {
            'filename': epub_path.name,
            'size_bytes': stat.st_size,
            'size_mb': round(stat.st_size / (1024 * 1024), 2),
            'file_count': len(doc.get_files_info()),
            'modified': datetime.fromtimestamp(stat.st_mtime).isoformat()
        }

    def create_organized_filename(self, metadata):
        """Create an organized filename from metadata."""
        # Clean strings for filename
        def clean_for_filename(s):
            return re.sub(r'[^\w\s-]', '', s).strip()[:50]

        author = clean_for_filename(metadata['author'] or 'Unknown_Author')
        title = clean_for_filename(metadata['title'] or 'Unknown_Title')
        year = metadata['year'] or 'Unknown_Year'

        return f"{author} - {title} ({year}).epub"

    def organize_file(self, epub_path, new_filename, metadata):
        """Organize file into structured directory."""
        # Create author directory
        author = metadata['author'] or 'Unknown_Author'
        author_dir = self.output_dir / author[:50]  # Limit length
        author_dir.mkdir(exist_ok=True)

        # Create final path
        final_path = author_dir / new_filename

        # Copy file to organized location
        shutil.copy2(str(epub_path), str(final_path))

        return final_path

    def generate_report(self, results):
        """Generate processing report."""
        report_path = self.output_dir / f"processing_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"

        summary = {
            'total_files': len(results),
            'successful': len([r for r in results if r['status'] == 'success']),
            'errors': len([r for r in results if r['status'] == 'error']),
            'generated_at': datetime.now().isoformat(),
            'results': results
        }

        with open(report_path, 'w', encoding='utf-8') as f:
            json.dump(summary, f, indent=2, ensure_ascii=False)

        print(f"Processing complete!")
        print(f"Successfully processed: {summary['successful']}")
        print(f"Errors: {summary['errors']}")
        print(f"Report saved to: {report_path}")

# Usage
processor = EPUBProcessor(
    input_dir="/path/to/new/epubs",
    output_dir="/path/to/organized/library",
    processed_dir="/path/to/processed/files"
)

results = processor.process_new_files()

Command-Line Power User Examples#

Advanced Shell Scripts#

Complex metadata extraction with error handling:

#!/bin/bash
# advanced-epub-analysis.sh

set -euo pipefail

EPUB_DIR="${1:-./}"
OUTPUT_FILE="detailed_analysis.json"

echo "Starting advanced EPUB analysis..."
echo "Directory: $EPUB_DIR"
echo "Output: $OUTPUT_FILE"

# Initialize JSON output
echo '{"analysis_date": "'$(date -Iseconds)'", "epubs": [' > "$OUTPUT_FILE"

first=true
find "$EPUB_DIR" -name "*.epub" -type f | while read -r epub; do
    echo "Analyzing: $(basename "$epub")"

    if [ "$first" = true ]; then
        first=false
    else
        echo "," >> "$OUTPUT_FILE"
    fi

    # Start JSON object for this EPUB
    echo '  {' >> "$OUTPUT_FILE"
    echo "    \"file\": \"$epub\"," >> "$OUTPUT_FILE"

    # Extract metadata with error handling
    if metadata=$(epub-utils "$epub" metadata --format kv 2>/dev/null); then
        echo "    \"metadata\": {" >> "$OUTPUT_FILE"

        # Parse metadata into JSON
        echo "$metadata" | while IFS=': ' read -r key value; do
            if [ -n "$key" ] && [ -n "$value" ]; then
                echo "      \"$key\": \"$value\"," >> "$OUTPUT_FILE"
            fi
        done | sed '$s/,$//' # Remove last comma

        echo "    }," >> "$OUTPUT_FILE"
    else
        echo "    \"metadata\": null," >> "$OUTPUT_FILE"
        echo "    \"metadata_error\": true," >> "$OUTPUT_FILE"
    fi

    # File analysis
    if file_info=$(epub-utils "$epub" files --format raw 2>/dev/null); then
        file_count=$(echo "$file_info" | wc -l)
        echo "    \"file_count\": $file_count," >> "$OUTPUT_FILE"
    else
        echo "    \"file_count\": null," >> "$OUTPUT_FILE"
    fi

    # File size
    size=$(stat -f%z "$epub" 2>/dev/null || stat -c%s "$epub" 2>/dev/null || echo "0")
    echo "    \"size_bytes\": $size," >> "$OUTPUT_FILE"

    # Validation check
    if epub-utils "$epub" container >/dev/null 2>&1 && \
       epub-utils "$epub" package >/dev/null 2>&1; then
        echo "    \"is_valid\": true" >> "$OUTPUT_FILE"
    else
        echo "    \"is_valid\": false" >> "$OUTPUT_FILE"
    fi

    echo "  }" >> "$OUTPUT_FILE"
done

# Close JSON
echo "]}" >> "$OUTPUT_FILE"

echo "Analysis complete! Results in $OUTPUT_FILE"

Batch processing with parallel execution:

#!/bin/bash
# parallel-epub-check.sh

EPUB_DIR="${1:-./}"
MAX_JOBS=4

export -f check_single_epub
check_single_epub() {
    epub="$1"
    base=$(basename "$epub")

    echo "[$base] Starting check..."

    # Quick validation
    if ! epub-utils "$epub" container >/dev/null 2>&1; then
        echo "[$base] ❌ Invalid container"
        return 1
    fi

    if ! epub-utils "$epub" package >/dev/null 2>&1; then
        echo "[$base] ❌ Invalid package"
        return 1
    fi

    # Check for required metadata
    metadata=$(epub-utils "$epub" metadata --format kv 2>/dev/null)

    if ! echo "$metadata" | grep -q "^title:"; then
        echo "[$base] ⚠️  Missing title"
    fi

    if ! echo "$metadata" | grep -q "^creator:"; then
        echo "[$base] ⚠️  Missing author"
    fi

    echo "[$base] ✅ Check complete"
}

# Run parallel checks
find "$EPUB_DIR" -name "*.epub" -type f | \
xargs -n 1 -P $MAX_JOBS -I {} bash -c 'check_single_epub "$@"' _ {}