On Thu, Jan 02, 2025 at 03:58:31PM -0300, Fabiano Rosas wrote: > The analyze-migration script was seen failing in s390x in misterious > ways. It seems we're reaching the subsection constructor without any
It might be a good idea to add some debug lines indeed. Though are you sure it's from parsing a subsection? https://lore.kernel.org/all/20241219123213.GA692742@fedora/ ====8<==== stderr: Traceback (most recent call last): File "/home/gitlab-runner/builds/4S3awx_3/0/qemu-project/qemu/build/scripts/analyze-migration.py", line 688, in <module> dump.read(dump_memory = args.memory) File "/home/gitlab-runner/builds/4S3awx_3/0/qemu-project/qemu/build/scripts/analyze-migration.py", line 625, in read section.read() File "/home/gitlab-runner/builds/4S3awx_3/0/qemu-project/qemu/build/scripts/analyze-migration.py", line 461, in read field['data'] = reader(field, self.file) File "/home/gitlab-runner/builds/4S3awx_3/0/qemu-project/qemu/build/scripts/analyze-migration.py", line 434, in __init__ for field in self.desc['struct']['fields']: KeyError: 'fields' ====8<==== It reads to me that it's not in "if 'subsections' in self.desc['struct']" loop yet, instead it looks like a real STRUCT field in one of the device sections? If that's true, then... > fields, which would indicate an empty .subsection entry in the vmstate > definition. We don't have any of those, at least not without the > unmigratable flag set, so this should never happen. > > Add some debug statements so that we can see what's going on the next > time the issue happens. > > Signed-off-by: Fabiano Rosas <faro...@suse.de> > --- > scripts/analyze-migration.py | 33 +++++++++++++++++++++++++++------ > 1 file changed, 27 insertions(+), 6 deletions(-) > > diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py > index 8a254a5b6a..1dd98f1d5a 100755 > --- a/scripts/analyze-migration.py > +++ b/scripts/analyze-migration.py > @@ -429,6 +429,10 @@ def __init__(self, desc, file): > super(VMSDFieldStruct, self).__init__(desc, file) > self.data = collections.OrderedDict() > > + if 'fields' not in self.desc['struct']: > + raise Exception("No fields in subsection key=%s name=%s" % > + (self.section_key, self.vmsd_name)) ... this self.section_key / self.vmsd_name may not exist.. In all cases, IMHO this would be better the debug lines work with both pure structs and sections. > + > # When we see compressed array elements, unfold them here > new_fields = [] > for field in self.desc['struct']['fields']: > @@ -477,6 +481,11 @@ def read(self): > raise Exception("Subsection %s not found at offset %x" % > ( subsection['vmsd_name'], self.file.tell())) > name = self.file.readstr() > version_id = self.file.read32() > + > + if not subsection: > + raise Exception("Empty description for subsection %s" % > + name) This is checking subsection==None?? I doubt whether this will hit.. > + > self.data[name] = VMSDSection(self.file, version_id, > subsection, (name, 0)) > self.data[name].read() > > @@ -575,9 +584,8 @@ def __init__(self, filename): > self.filename = filename > self.vmsd_desc = None > > - def read(self, desc_only = False, dump_memory = False, write_memory = > False): > - # Read in the whole file > - file = MigrationFile(self.filename) > + def _read(self, file, vmsd_json, desc_only = False, dump_memory = False, > + write_memory = False): > > # File magic > data = file.read32() > @@ -589,7 +597,7 @@ def read(self, desc_only = False, dump_memory = False, > write_memory = False): > if data != self.QEMU_VM_FILE_VERSION: > raise Exception("Invalid version number %d" % data) > > - self.load_vmsd_json(file) > + self.load_vmsd_json(file, vmsd_json) > > # Read sections > self.sections = collections.OrderedDict() > @@ -632,12 +640,25 @@ def read(self, desc_only = False, dump_memory = False, > write_memory = False): > raise Exception("Mismatched section footer: %x vs %x" % > (read_section_id, section_id)) > else: > raise Exception("Unknown section type: %d" % section_type) > - file.close() > > - def load_vmsd_json(self, file): > + def read(self, desc_only = False, dump_memory = False, > + write_memory = False): > + file = MigrationFile(self.filename) > vmsd_json = file.read_migration_debug_json() > + > + try: > + self._read(file, vmsd_json, desc_only, dump_memory, write_memory) > + except: > + raise Exception("Full JSON dump:\n%s", vmsd_json) Better dump the Exception line itself too? IIUC that needs things like: except Exception as e: raise Exception(XXX, e, vmsd_json) More below.. > + finally: > + file.close() > + > + def load_vmsd_json(self, file, vmsd_json): > self.vmsd_desc = json.loads(vmsd_json, > object_pairs_hook=collections.OrderedDict) > for device in self.vmsd_desc['devices']: > + if 'fields' not in device: > + raise Exception("vmstate for device %s has no fields" % > + device['name']) This looks a valid check, though I still doubt whether this would hit at all for this specific error (which looks like a top level STRUCT of a section, that is missing "fields"). > key = (device['name'], device['instance_id']) > value = ( VMSDSection, device ) > self.section_classes[key] = value > -- > 2.35.3 > For the capture part, would it be easier if we trap at the very top level once and for all, then try to dump the vmsd_desc as long as existed? Like this: ====8<==== @@ -685,9 +686,15 @@ def default(self, o): f.close() elif args.dump == "state": dump = MigrationDump(args.file) - dump.read(dump_memory = args.memory) - dict = dump.getDict() - print(jsonenc.encode(dict)) + try: + dump.read(dump_memory = args.memory) + dict = dump.getDict() + print(jsonenc.encode(dict)) + except Exception as e: + # If loading vmstate stream failed, try the best to dump vmsd desc + raise Exception( + f"Caught exception when reading dump: {e}\n" + f"Trying to dump vmsd_desc: {jsonenc.encode(dump.vmsd_desc)}") elif args.dump == "desc": dump = MigrationDump(args.file) dump.read(desc_only = True) ====8<==== So no matter what caused error, fallback to try dump vmsd_desc as long as available. Thanks, -- Peter Xu