https://bugs.kde.org/show_bug.cgi?id=451524

--- Comment #3 from Jonathan Kamens <j...@kamens.us> ---
(In reply to caulier.gilles from comment #1)
> All metadata operation (read/write) are done by libexiv2 shared library, not
> digiKam directly.

I mean, yes, I get that, but isn't digiKam telling libexiv2 what data to write
into the file? Could it not use libexiv2 to read what's already in the file,
compare it to what is in the database, and only modify the file if there are
actual differences?

> Can you compare metadata from an image file where nothing is changed and
> where image is touched. You can use ExifTool for this task. This will allow
> to identify which information are touched in this case.

Yes, I could do that, but I think any solution to this problem that is
implemented needs to be more comprehensive than just "handle the fields that
some guy on the internet listed in a bug ticket."

To fix the 6,000 files that were modified that didn't need to be in my
collection last night, I wrote this Python script to figure out which files
were actually substantively different. This will show you at the very least the
fields that were unnecessarily modified in _my_ case (note, in particular, the
`strip_ignored` and `fix_values` functions), but I can't claim that this covers
everything:

```
#!/usr/bin/env python3

# Calls exiftool on two files. Reads the results and does a semantic
# comparison. Displays any differences and exits with non-zero status if there
# are differences. Ignores exiftool output lines that I've empirically
# determined are not reflective of substantive changes.

import copy
import pprint
import re
import subprocess
import sys
import xml.etree.ElementTree as ET


def exiftool_get(path):
    result = subprocess.run(('exiftool', path), encoding='us-ascii',
                            capture_output=True, check=True)
    values = {}
    for line in result.stdout.strip().split('\n'):
        key, value = re.split(r'\s*:\s*', line, 1)
        if (not value) or (value == '(none)'):
            continue
        if key in values:
            if isinstance(values[key], list):
                values[key].append(value)
            else:
                values[key] = [values[key], value]
        else:
            values[key] = value
    return values


def strip_ignored(exif):
    exif = {k: v for k, v in exif.items()
            if k not in ('Directory',
                         'File Modification Date/Time',
                         'File Access Date/Time',
                         'File Inode Change Date/Time',
                         'File Permissions', 'File Size',
                         'Region Applied To Dimensions H',
                         'Region Applied To Dimensions Unit',
                         'Region Applied To Dimensions W',
                         'Current IPTC Digest')}
    return exif


def fix_values(exif):
    exif = copy.deepcopy(exif)
    for k in ('Tags List', 'Subject', 'Catalog Sets', 'Last Keyword XMP',
              'Keywords', 'Hierarchical Subject'):
        if k in exif:
            exif[k] = tuple(sorted(re.split(r'\s*,\s*', exif[k])))
    if 'Categories' in exif:
        root = ET.fromstring(exif['Categories'])
        for category in root:
            category[:] = sorted(category, key=lambda child: child.text)
        exif['Categories'] = ET.tostring(root)
    return exif


def main():
    file1 = sys.argv[1]
    file2 = sys.argv[2]
    exif1 = exiftool_get(file1)
    exif2 = exiftool_get(file2)
    exif1 = strip_ignored(exif1)
    exif2 = strip_ignored(exif2)
    exif1 = fix_values(exif1)
    exif2 = fix_values(exif2)
    only1 = {}
    only2 = {}
    different = {}
    for k, v1 in exif1.items():
        if k not in exif2:
            only1[k] = v1
        elif v1 != exif2[k]:
            different[k] = (v1, exif2[k])
    for k, v2 in exif2.items():
        if k not in exif1:
            only2[k] = v2
    if not (only1 or only2 or different):
        return 0
    if only1:
        print(f'Only in {file1}:')
        pprint.pprint(only1)
    if only2:
        print(f'Only in {file2}:')
        pprint.pprint(only2)
    if different:
        print('Different:')
        pprint.pprint(different)
    return 1


if __name__ == '__main__':
    sys.exit(main())
```

-- 
You are receiving this mail because:
You are watching all bug changes.

Reply via email to