commit:     e6fcf51f824bd476bec6106b7ad478e73be34cc5
Author:     Jauhien Piatlicki <jauhien <AT> gentoo <DOT> org>
AuthorDate: Thu Apr 16 22:23:16 2015 +0000
Commit:     Jauhien Piatlicki <jauhien <AT> gentoo <DOT> org>
CommitDate: Thu Apr 16 22:23:16 2015 +0000
URL:        https://gitweb.gentoo.org/proj/g-sorcery.git/commit/?id=e6fcf51f

change DB structure

move to new DB layout with versioning
support bson format for category files

 g_sorcery/bson/__init__.py |   1 +
 g_sorcery/bson/bson.py     |  47 +++++
 g_sorcery/db_layout.py     | 283 ++++++++++++++++++++++++++++
 g_sorcery/exceptions.py    |  12 +-
 g_sorcery/fileutils.py     |  62 ++++--
 g_sorcery/package_db.py    | 455 +++++++++++++++++++++------------------------
 g_sorcery/serialization.py |  98 ++++++++--
 gs_db_tool/gs_db_tool.py   |  10 +-
 scripts/all_pythons.sh     |   2 +-
 setup.py                   |  29 ++-
 tests/test_PackageDB.py    |  10 +-
 11 files changed, 707 insertions(+), 302 deletions(-)

diff --git a/g_sorcery/bson/__init__.py b/g_sorcery/bson/__init__.py
new file mode 100644
index 0000000..4265cc3
--- /dev/null
+++ b/g_sorcery/bson/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python

diff --git a/g_sorcery/bson/bson.py b/g_sorcery/bson/bson.py
new file mode 100644
index 0000000..fdb8bb9
--- /dev/null
+++ b/g_sorcery/bson/bson.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+    bson.py
+    ~~~~~~~
+
+    bson file format support
+
+    :copyright: (c) 2015 by Jauhien Piatlicki
+    :license: GPL-2, see LICENSE for more details.
+"""
+
+import bson
+
+from g_sorcery.exceptions import FileJSONError
+from g_sorcery.fileutils import FileJSONData
+from g_sorcery.serialization import from_raw_serializable, to_raw_serializable
+
+class FileBSON(FileJSONData):
+    """
+    Class for BSON files. Supports custom JSON serialization
+    provided by g_sorcery.serialization.
+    """
+    def read_content(self):
+        """
+        Read BSON file.
+        """
+        content = {}
+        bcnt = None
+        with open(self.path, 'rb') as f:
+            bcnt = f.read()
+        if not bcnt:
+            raise FileJSONError('failed to read: ', self.path)
+        rawcnt = bson.BSON.decode(bcnt)
+        content = from_raw_serializable(rawcnt)
+        return content
+
+
+    def write_content(self, content):
+        """
+        Write BSON file.
+        """
+        rawcnt = to_raw_serializable(content)
+        bcnt = bson.BSON.encode(rawcnt)
+        with open(self.path, 'wb') as f:
+            f.write(bcnt)

diff --git a/g_sorcery/db_layout.py b/g_sorcery/db_layout.py
new file mode 100644
index 0000000..fe6f281
--- /dev/null
+++ b/g_sorcery/db_layout.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+    db_layout.py
+    ~~~~~~~~~~~~
+
+    package database file layout
+
+    :copyright: (c) 2013-2015 by Jauhien Piatlicki
+    :license: GPL-2, see LICENSE for more details.
+"""
+
+import hashlib
+import os
+import shutil
+
+from .exceptions import DBLayoutError, DBStructureError, FileJSONError, 
IntegrityError
+from .fileutils import FileJSON, hash_file
+
+CATEGORIES_FILE_NAME = 'categories'
+MANIFEST_FILE_NAME = 'manifest'
+METADATA_FILE_NAME = 'metadata'
+PACKAGES_FILE_NAME = 'packages'
+
+JSON_FILE_SUFFIX = 'json'
+BSON_FILE_SUFFIX = 'bson'
+
+class CategoryJSON(FileJSON):
+    """
+    Category file in JSON format.
+    """
+    def __init__(self, directory, category):
+        super(CategoryJSON, 
self).__init__(os.path.join(os.path.abspath(directory), category),
+                                           file_name(PACKAGES_FILE_NAME, 
JSON_FILE_SUFFIX))
+
+
+SUPPORTED_FILE_FORMATS = {JSON_FILE_SUFFIX: CategoryJSON}
+
+
+# bson module is optional, we should check if it is installed
+try:
+    from g_sorcery.bson.bson import FileBSON
+
+    class CategoryBSON(FileBSON):
+        """
+        Category file in BSON format.
+        """
+        def __init__(self, directory, category):
+            super(CategoryBSON, 
self).__init__(os.path.join(os.path.abspath(directory), category),
+                                               file_name(PACKAGES_FILE_NAME, 
BSON_FILE_SUFFIX))
+
+    SUPPORTED_FILE_FORMATS[BSON_FILE_SUFFIX] = CategoryBSON
+
+except ImportError as e:
+    pass
+
+
+def file_name(name, suffix=JSON_FILE_SUFFIX):
+    """
+    Return file name based on name and suffix.
+    """
+    return name + '.' + suffix
+
+
+class Manifest(FileJSON):
+    """
+    Manifest file.
+    """
+
+    def __init__(self, directory):
+        super(Manifest, self).__init__(os.path.abspath(directory), 
file_name(MANIFEST_FILE_NAME))
+
+    def check(self):
+        """
+        Check manifest.
+        """
+        manifest = self.read()
+
+        result = True
+        errors = []
+
+        names = [file_name(CATEGORIES_FILE_NAME)]
+        for name in names:
+            if not name in manifest:
+                raise DBLayoutError('Bad manifest: no ' + name + ' entry')
+
+        for name, value in manifest.items():
+            if hash_file(os.path.join(self.directory, name), hashlib.md5()) != 
\
+                value:
+                errors.append(name)
+
+        if errors:
+            result = False
+
+        return (result, errors)
+
+    def digest(self, mandatory_files):
+        """
+        Generate manifest.
+        """
+        if not file_name(CATEGORIES_FILE_NAME) in mandatory_files:
+            raise DBLayoutError('Categories file: ' + 
file_name(CATEGORIES_FILE_NAME) \
+                                + ' is not in the list of mandatory files')
+
+        categories = Categories(self.directory)
+        categories = categories.read()
+
+        manifest = {}
+
+        for name in mandatory_files:
+            manifest[name] = hash_file(os.path.join(self.directory, name),
+                                       hashlib.md5())
+
+        for category in categories:
+            category_path = os.path.join(self.directory, category)
+            if not os.path.isdir(category_path):
+                raise DBStructureError('Empty category: ' + category)
+            for root, _, files in os.walk(category_path):
+                for f in files:
+                    manifest[os.path.join(root[len(self.directory)+1:], f)] = \
+                    hash_file(os.path.join(root, f), hashlib.md5())
+
+        self.write(manifest)
+
+
+class Metadata(FileJSON):
+    """
+    Metadata file.
+    """
+    def __init__(self, directory):
+        super(Metadata, self).__init__(os.path.abspath(directory),
+                                       file_name(METADATA_FILE_NAME),
+                                       ['db_version', 'layout_version', 
'category_format'])
+
+    def read(self):
+        """
+        Read metadata file.
+
+        If file doesn't exist, we have a legacy DB
+        with DB layout v. 0. Fill metadata appropriately.
+        """
+        if not os.path.exists(self.directory):
+            os.makedirs(self.directory)
+        content = {}
+        if not os.path.isfile(self.path):
+            content = {'db_version': 0, 'layout_version': 0, 
'category_format': JSON_FILE_SUFFIX}
+        else:
+            content = self.read_content()
+            for key in self.mandatories:
+                if not key in content:
+                    raise FileJSONError('lack of mandatory key: ' + key)
+
+        return content
+
+
+class Categories(FileJSON):
+    """
+    Categories file.
+    """
+    def __init__(self, directory):
+        super(Categories, self).__init__(os.path.abspath(directory),
+                                         file_name(CATEGORIES_FILE_NAME))
+
+
+def get_layout(metadata):
+    """
+    Get layout parameters based on metadata.
+    """
+    layout_version = metadata['layout_version']
+    if layout_version == 0:
+        return (CategoryJSON, [file_name(CATEGORIES_FILE_NAME)])
+    elif layout_version == 1:
+        category_format = metadata['category_format']
+        try:
+            category_cls = SUPPORTED_FILE_FORMATS[category_format]
+        except KeyError:
+            raise DBLayoutError("unsupported packages file format: " + 
category_format)
+        return (category_cls, [file_name(CATEGORIES_FILE_NAME), 
file_name(METADATA_FILE_NAME)])
+    else:
+        raise DBLayoutError("unsupported DB layout version: " + layout_version)
+
+
+class DBLayout(object):
+    """
+    Filesystem DB layout.
+
+    Directory layout.
+    ~~~~~~~~~~~~~~~~~
+
+    For legacy DB layout v. 0:
+
+    db dir
+        manifest.json: database manifest
+        categories.json: information about categories
+        category1
+            packages.json: information about available packages
+        category2
+        ...
+
+    For DB layout v. 1:
+
+    db dir
+        manifest.json: database manifest
+        categories.json: information about categories
+        metadata.json: DB metadata
+        category1
+            packages.[b|j]son: information about available packages
+        category2
+        ...
+
+    Packages file can be in json or bson formats.
+    """
+
+    def __init__(self, directory):
+        self.directory = os.path.abspath(directory)
+        self.manifest = Manifest(self.directory)
+
+    def check_manifest(self):
+        """
+        Check manifest.
+        """
+        sane, errors = self.manifest.check()
+        if not sane:
+            raise IntegrityError('Manifest error: ' + str(errors))
+
+    def clean(self):
+        """
+        Remove DB files.
+        """
+        if os.path.exists(self.directory):
+            shutil.rmtree(self.directory)
+
+    def read(self):
+        """
+        Read DB files.
+
+        Returns a tuple with metadata, list of categories
+        and categories dictionary.
+        """
+        self.check_manifest()
+
+        metadata_f = Metadata(self.directory)
+        metadata = metadata_f.read()
+
+        category_cls, _ = get_layout(metadata)
+
+        categories_f = Categories(self.directory)
+        categories = categories_f.read()
+
+        packages = {}
+        for category in categories:
+            category_path = os.path.join(self.directory, category)
+            if not os.path.isdir(category_path):
+                raise DBLayoutError('Empty category: ' + category)
+            category_f = category_cls(self.directory, category)
+            pkgs = category_f.read()
+            if not pkgs:
+                raise DBLayoutError('Empty category: ' + category)
+            packages[category] = pkgs
+
+        return (metadata, categories, packages)
+
+    def write(self, metadata, categories, packages):
+        """
+        Write DB files.
+        """
+        category_cls, mandatory_files = get_layout(metadata)
+
+        self.clean()
+
+        if file_name(METADATA_FILE_NAME) in mandatory_files:
+            metadata_f = Metadata(self.directory)
+            metadata_f.write(metadata)
+
+        categories_f = Categories(self.directory)
+        categories_f.write(categories)
+
+        for category in categories:
+            category_f = category_cls(self.directory, category)
+            category_f.write(packages[category])
+
+        self.manifest.digest(mandatory_files)

diff --git a/g_sorcery/exceptions.py b/g_sorcery/exceptions.py
index a8d7238..4691ce6 100644
--- a/g_sorcery/exceptions.py
+++ b/g_sorcery/exceptions.py
@@ -4,10 +4,10 @@
 """
     exceptions.py
     ~~~~~~~~~~~~~
-    
+
     Exceptions hierarchy
-    
-    :copyright: (c) 2013 by Jauhien Piatlicki
+
+    :copyright: (c) 2013-2015 by Jauhien Piatlicki
     :license: GPL-2, see LICENSE for more details.
 """
 
@@ -17,6 +17,9 @@ class GSorceryError(Exception):
 class DBError(GSorceryError):
     pass
 
+class DBLayoutError(GSorceryError):
+    pass
+
 class InvalidKeyError(DBError):
     pass
 
@@ -49,3 +52,6 @@ class DigestError(GSorceryError):
 
 class DownloadingError(GSorceryError):
     pass
+
+class SerializationError(GSorceryError):
+    pass

diff --git a/g_sorcery/fileutils.py b/g_sorcery/fileutils.py
index 443206c..d783c8a 100644
--- a/g_sorcery/fileutils.py
+++ b/g_sorcery/fileutils.py
@@ -4,10 +4,10 @@
 """
     fileutils.py
     ~~~~~~~~~~~~
-    
+
     file utilities
-    
-    :copyright: (c) 2013 by Jauhien Piatlicki
+
+    :copyright: (c) 2013-2015 by Jauhien Piatlicki
     :license: GPL-2, see LICENSE for more details.
 """
 
@@ -15,17 +15,15 @@ import glob
 import json
 import hashlib
 import os
-import shutil
 import tarfile
 
 from .compatibility import TemporaryDirectory
 from .exceptions import FileJSONError, DownloadingError
 from .serialization import JSONSerializer, deserializeHook
 
-class FileJSON(object):
+class FileJSONData(object):
     """
-    Class for JSON files. Supports custom JSON serialization
-    provided by g_sorcery.serialization.
+    Class for files with JSON compatible data.
     """
     def __init__(self, directory, name, mandatories=None):
         """
@@ -33,7 +31,6 @@ class FileJSON(object):
             directory: File directory.
             name: File name.
             mandatories: List of requiered keys.
-            loadconv: Type change values on loading.
         """
         self.directory = os.path.abspath(directory)
         self.name = name
@@ -45,7 +42,7 @@ class FileJSON(object):
 
     def read(self):
         """
-        Read JSON file.
+        Read file.
         """
         if not os.path.exists(self.directory):
             os.makedirs(self.directory)
@@ -53,27 +50,58 @@ class FileJSON(object):
         if not os.path.isfile(self.path):
             for key in self.mandatories:
                 content[key] = ""
-            with open(self.path, 'w') as f:
-                json.dump(content, f, indent=2,
-                    sort_keys=True, cls=JSONSerializer)
+            self.write_content(content)
         else:
-            with open(self.path, 'r') as f:
-                content = json.load(f, object_hook=deserializeHook)
+            content = self.read_content()
             for key in self.mandatories:
                 if not key in content:
                     raise FileJSONError('lack of mandatory key: ' + key)
-        
+
         return content
 
+    def read_content(self):
+        """
+        Real read operation with deserialization. Should be overridden.
+        """
+        return []
+
     def write(self, content):
         """
-        Write JSON file.
+        Write file.
         """
         for key in self.mandatories:
             if not key in content:
                 raise FileJSONError('lack of mandatory key: ' + key)
         if not os.path.exists(self.directory):
             os.makedirs(self.directory)
+        self.write_content(content)
+
+    def write_content(self, content):
+        """
+        Real write operation with serialization. Should be overridden.
+        """
+        pass
+
+
+class FileJSON(FileJSONData):
+    """
+    Class for JSON files. Supports custom JSON serialization
+    provided by g_sorcery.serialization.
+    """
+
+    def read_content(self):
+        """
+        Read JSON file.
+        """
+        content = {}
+        with open(self.path, 'r') as f:
+            content = json.load(f, object_hook=deserializeHook)
+        return content
+
+    def write_content(self, content):
+        """
+        Write JSON file.
+        """
         with open(self.path, 'w') as f:
             json.dump(content, f, indent=2, sort_keys=True, cls=JSONSerializer)
 
@@ -149,7 +177,7 @@ class ManifestEntry(object):
 
     __slots__ = ('directory', 'name', 'ftype',
                  'size', 'sha256', 'sha512', 'whirlpool')
-    
+
     def __init__(self, directory, name, ftype):
         self.directory = directory
         self.name = name

diff --git a/g_sorcery/package_db.py b/g_sorcery/package_db.py
index f19f9d4..5eeeb63 100644
--- a/g_sorcery/package_db.py
+++ b/g_sorcery/package_db.py
@@ -4,45 +4,46 @@
 """
     package_db.py
     ~~~~~~~~~~~~~
-    
+
     package database
-    
-    :copyright: (c) 2013 by Jauhien Piatlicki
+
+    :copyright: (c) 2013-2015 by Jauhien Piatlicki
     :license: GPL-2, see LICENSE for more details.
 """
 
 import glob
-import hashlib
 import os
-import shutil
-import sys
 
 import portage
 
 from .compatibility import basestring, py2k, TemporaryDirectory
 
-from .exceptions import DBStructureError, IntegrityError, \
-     InvalidKeyError, SyncError
-from .fileutils import FileJSON, hash_file, load_remote_file, copy_all, wget
+from .db_layout import DBLayout, JSON_FILE_SUFFIX
+from .exceptions import DBError, DBStructureError, InvalidKeyError, SyncError
+from .fileutils import FileJSON, load_remote_file, copy_all, wget
 from .g_collections import Package
-from .logger import Logger, ProgressBar
+from .logger import Logger
 
 
 class PackageDB(object):
     """
     Package database.
-    Database is a directory and related data structure.
-
-    Directory layout.
-    ~~~~~~~~~~~~~~~~~
-    db dir
-        manifest.json: database manifest
-        categories.json: information about categories
-        category1
-            packages.json: information about available packages
-        category2
-        ...
-    
+    It uses DBLayout class to manipulate files that
+    contain DB stored on disk.
+
+    There are two versions of DB layout now:
+        0 -- legacy version
+        1 -- new version that supports DB structure versioning
+
+    DB structure itself has two versions:
+        0 -- legacy version, categories contain dictionary package name: 
versions dict
+        1 -- actual version, corresponds to the DB in memory:
+            DB is a dictionary with categories as keys.
+            Each category contains a dictionary with two entries:
+                common_data -- fields common to all the packages
+                packages -- dictionary with packages (content of category 
dictionary in v. 0)
+
+    For DB layout v. 0 only DB structure v. 0 is possible.
     """
 
     class Iterator(object):
@@ -50,68 +51,113 @@ class PackageDB(object):
         Iterator class over the package database.
         """
         def __init__(self, package_db):
-            self.pkg_iter = iter(package_db.database.items())
+            self.cats_iter = iter(package_db.database.items())
             try:
-                self.pkgname, self.vers_dict = next(self.pkg_iter)
+                self.cat_name, self.cat_data = next(self.cats_iter)
             except StopIteration:
-                self.pkgname, self.vers_dict = None, None
-            if self.vers_dict:
-                self.vers_iter = iter(self.vers_dict.items())
-            else:
-                self.vers_iter = None
+                self.set_to_end()
+                return
+
+            if not self.cat_data:
+                self.set_to_end()
+                return
+
+            self.pkgs_iter = iter(self.cat_data['packages'].items())
+            try:
+                self.pkg_name, self.pkg_data = next(self.pkgs_iter)
+            except StopIteration:
+                self.set_to_end()
+                return
+
+            if not self.pkg_data:
+                self.set_to_end()
+                return
+
+            self.vers_iter = iter(self.pkg_data.items())
+
+        def set_to_end(self):
+            self.cat_name, self.cat_data = None, None
+            self.pkgs_iter = None
+            self.pkg_name, self.pkg_data = None, None
+            self.vers_iter = None
 
         def __iter__(self):
             return self
 
         if py2k:
             def next(self):
-                if not self.vers_iter:
+                if not self.vers_iter or not self.pkgs_iter:
                     raise StopIteration
+
                 ver, ebuild_data = None, None
                 while not ver:
                     try:
                         ver, ebuild_data = next(self.vers_iter)
                     except StopIteration:
                         ver, ebuild_data = None, None
-
                     if not ver:
-                        self.pkgname, self.vers_dict = next(self.pkg_iter)
-                        self.vers_iter = iter(self.vers_dict.items())
+                        try:
+                            self.pkg_name, self.pkg_data = next(self.pkgs_iter)
+                            self.vers_iter = iter(self.pkg_data.items())
+                        except StopIteration:
+                            self.cat_name, self.cat_data = next(self.cats_iter)
+                            self.pkgs_iter = 
iter(self.cat_data['packages'].items())
+                            self.pkg_name, self.pkg_data = next(self.pkgs_iter)
+                            self.vers_iter = iter(self.pkg_data.items())
+
+                ebuild_data.update(self.cat_data['common_data'])
+                return (Package(self.cat_name, self.pkg_name, ver), 
ebuild_data)
 
-                category, name = self.pkgname.split('/')
-                return (Package(category, name, ver), ebuild_data)
         else:
             def __next__(self):
-                if not self.vers_iter:
+                if not self.vers_iter or not self.pkgs_iter:
                     raise StopIteration
+
                 ver, ebuild_data = None, None
                 while not ver:
                     try:
                         ver, ebuild_data = next(self.vers_iter)
                     except StopIteration:
                         ver, ebuild_data = None, None
-
                     if not ver:
-                        self.pkgname, self.vers_dict = next(self.pkg_iter)
-                        self.vers_iter = iter(self.vers_dict.items())
+                        try:
+                            self.pkg_name, self.pkg_data = next(self.pkgs_iter)
+                            self.vers_iter = iter(self.pkg_data.items())
+                        except StopIteration:
+                            self.cat_name, self.cat_data = next(self.cats_iter)
+                            self.pkgs_iter = 
iter(self.cat_data['packages'].items())
+                            self.pkg_name, self.pkg_data = next(self.pkgs_iter)
+                            self.vers_iter = iter(self.pkg_data.items())
 
-                category, name = self.pkgname.split('/')
-                return (Package(category, name, ver), ebuild_data)
+                ebuild_data.update(self.cat_data['common_data'])
+                return (Package(self.cat_name, self.pkg_name, ver), 
ebuild_data)
 
 
-    def __init__(self, directory):
-        """
-        Args:
-            directory: database directory.
-        """
+    def __init__(self, directory,
+                 preferred_layout_version=1,
+                 preferred_db_version=1,
+                 preferred_category_format=JSON_FILE_SUFFIX):
+
+        if preferred_layout_version == 0 \
+           and preferred_db_version != 0:
+            raise DBStructureError("Wrong DB version: " + preferred_db_version 
+ \
+                                   ", with DB layout version 0 it can be only 
0")
+
+        if not preferred_db_version in [0, 1]:
+            raise DBStructureError("Unsupported DB version: " + 
preferred_db_version)
+
         self.logger = Logger()
-        self.CATEGORIES_NAME = 'categories.json'
-        self.PACKAGES_NAME = 'packages.json'
         self.directory = os.path.abspath(directory)
+        self.preferred_layout_version = preferred_layout_version
+        self.preferred_db_version = preferred_db_version
+        self.preferred_category_format = preferred_category_format
+        self.db_layout = DBLayout(self.directory)
         self.reset_db()
 
+
     def __iter__(self):
-        return(PackageDB.Iterator(self))
+        return PackageDB.Iterator(self)
+
 
     def reset_db(self):
         """
@@ -120,6 +166,7 @@ class PackageDB(object):
         self.database = {}
         self.categories = {}
 
+
     def sync(self, db_uri):
         """
         Synchronize local database with remote database.
@@ -131,7 +178,7 @@ class PackageDB(object):
         download_dir = TemporaryDirectory()
         if wget(real_db_uri, download_dir.name):
             raise SyncError('sync failed: ' + real_db_uri)
-        
+
         temp_dir = TemporaryDirectory()
         for f_name in glob.iglob(os.path.join(download_dir.name, '*.tar.gz')):
             self.logger.info("unpacking " + f_name)
@@ -140,19 +187,18 @@ class PackageDB(object):
         tempdb_dir = os.path.join(temp_dir.name, os.listdir(temp_dir.name)[0])
         tempdb = PackageDB(tempdb_dir)
 
-        if not tempdb.check_manifest()[0]:
-            raise IntegrityError('Manifest check failed.')
+        tempdb.db_layout.check_manifest()
 
         self.logger.info("copy files to an actual database")
         self.clean()
         copy_all(tempdb_dir, self.directory)
-        
-        if not self.check_manifest()[0]:
-            raise IntegrityError('Manifest check failed, db inconsistent.')
-                
+
+        self.db_layout.check_manifest()
+
         del download_dir
         del temp_dir
 
+
     def get_real_db_uri(self, db_uri):
         """
         Convert self.db_uri to URI where remote database can be
@@ -162,196 +208,61 @@ class PackageDB(object):
             URI of remote database file.
         """
         return db_uri
-            
-    def manifest(self):
-        """
-        Generate database manifest.
-        """
-        categories = FileJSON(self.directory, self.CATEGORIES_NAME, [])
-        categories = categories.read()
-        manifest = {}
-        names = [self.CATEGORIES_NAME]
-        for name in names:
-            manifest[name] = hash_file(os.path.join(self.directory, name),
-                                      hashlib.md5())
-        for category in categories:
-            category_path = os.path.join(self.directory, category)
-            if not os.path.isdir(category_path):
-                raise DBStructureError('Empty category: ' + category)
-            for root, dirs, files in os.walk(category_path):
-                for f in files:
-                    manifest[os.path.join(root[len(self.directory)+1:], f)] = \
-                    hash_file(os.path.join(root, f), hashlib.md5())
-        m_f = FileJSON(self.directory, 'manifest.json', [])
-        m_f.write(manifest)
-
-    def check_manifest(self):
-        """
-        Check database manifest.
 
-        Returns:
-            Tuple with first element containing result of manifest check
-            as boolean and second element containing list of files with errors.
-        """
-        self.logger.info("checking manifest")
-        m_f = FileJSON(self.directory, 'manifest.json', [])
-        manifest = m_f.read()
-
-        result = True
-        errors = []
-
-        names = [self.CATEGORIES_NAME]
-        for name in names:
-            if not name in manifest:
-                raise DBStructureError('Bad manifest: no ' + name + ' entry')
-
-        for name, value in manifest.items():
-            if hash_file(os.path.join(self.directory, name), hashlib.md5()) != 
\
-                value:
-                errors.append(name)
-
-        if errors:
-            result = False
-
-        return (result, errors)
 
     def clean(self):
         """
         Clean database.
         """
-        if os.path.exists(self.directory):
-            shutil.rmtree(self.directory)
+        self.db_layout.clean()
         self.reset_db()
-        self.write_and_manifest()
-
-    def write_and_manifest(self):
-        """
-        Write and digest database.
-        """
         self.write()
-        self.manifest()
+
 
     def write(self):
         """
-        Write database.
+        Write and digest database.
         """
-        categories_f = FileJSON(self.directory, self.CATEGORIES_NAME, [])
-        categories_f.write(self.categories)
-
         if self.database:
-            self.logger.info("writing database")
+            self.logger.info("writing database...")
+
+        metadata = {'db_version': self.preferred_db_version,
+                    'layout_version': self.preferred_layout_version,
+                    'category_format': self.preferred_category_format}
+
+        if self.preferred_db_version == 0:
+            packages = dict(self.database)
+            for category, cat_data in packages.items():
+                for _, versions in cat_data['packages'].items():
+                    for version, ebuild_data in versions.items():
+                        ebuild_data.update(cat_data['common_data'])
+                packages[category] = cat_data['packages']
+        else:
+            packages = dict(self.database)
 
-        progress_bar = ProgressBar(20, len(list(self.database)))
-        if self.database:
-            progress_bar.begin()
-
-        categories_content = {}
-        for category in self.categories:
-            categories_content[category] = {}
-        
-        for pkgname, versions in self.database.items():
-            category, name = pkgname.split('/')
-            if not category or (not category in self.categories):
-                raise DBStructureError('Non existent: ' + category)
-            categories_content[category][name] = {}
-            for version, content in versions.items():
-                categories_content[category][name][version] = content
-                self.additional_write_version(category, name, version)
-            self.additional_write_package(category, name)
-            progress_bar.increment()
-
-        for category in self.categories:
-            f = FileJSON(os.path.join(self.directory, category), 
self.PACKAGES_NAME, [])
-            f.write(categories_content[category])
-            self.additional_write_category(category)
-
-        self.additional_write()
+        self.db_layout.write(metadata, self.categories, packages)
 
         if self.database:
-            progress_bar.end()
-            print("")
-
-    def additional_write_version(self, category, package, version):
-        """
-        Hook to be overrided.
-        """
-        pass
-
-    def additional_write_package(self, category, package):
-        """
-        Hook to be overrided.
-        """
-        pass
-
-    def additional_write_category(self, category):
-        """
-        Hook to be overrided.
-        """
-        pass
+            self.logger.info("database written")
 
-    def additional_write(self):
-        """
-        Hook to be overrided.
-        """
-        pass
 
     def read(self):
         """
         Read database.
         """
-        sane, errors = self.check_manifest()
-        if not sane:
-            raise IntegrityError('Manifest error: ' + str(errors))
-        categories_f = FileJSON(self.directory, self.CATEGORIES_NAME, [])
-        self.categories = categories_f.read()
-        for category in self.categories:
-            category_path = os.path.join(self.directory, category)
-            if not os.path.isdir(category_path):
-                raise DBStructureError('Empty category: ' + category)
-            
-            f = FileJSON(category_path, self.PACKAGES_NAME, [])
-            packages = f.read()
-            if not packages:
-                raise DBStructureError('Empty category: ' + category)
-            
-            for name, versions in packages.items():
-                
-                if not versions:
-                    error_msg = 'Empty package: ' + category + '/' + name
-                    raise DBStructureError(error_msg)
-                
-                pkgname = category + '/' + name
-                self.database[pkgname] = versions
-                for version in versions:
-                    self.additional_read_version(category, name, version)
-                self.additional_read_package(category, name)
-            self.additional_read_category(category)
-        self.additional_read()
+        metadata, self.categories, packages = self.db_layout.read()
 
-    def additional_read_version(self, category, package, version):
-        """
-        Hook to be overrided.
-        """
-        pass
+        db_version = metadata['db_version']
+        self.database = packages
+        if db_version == 0:
+            for category, cat_data in self.database.items():
+                self.database[category] = {'common_data': {}, 'packages': 
cat_data}
+        elif db_version == 1:
+            pass
+        else:
+            raise DBStructureError("Unsupported DB version: " + db_version)
 
-    def additional_read_package(self, category, package):
-        """
-        Hook to be overrided.
-        """
-        pass
 
-    def additional_read_category(self, category):
-        """
-        Hook to be overrided.
-        """
-        pass
-
-    def additional_read(self):
-        """
-        Hook to be overrided.
-        """
-        pass
-        
     def add_category(self, category, description=None):
         """
         Add a category.
@@ -364,6 +275,7 @@ class PackageDB(object):
             description = {}
         self.categories[category] = description
 
+
     def add_package(self, package, ebuild_data=None):
         """
         Add a package.
@@ -374,15 +286,25 @@ class PackageDB(object):
         """
         if not ebuild_data:
             ebuild_data = {}
+
         category = package.category
         name = package.name
         version = package.version
-        pkgname = category + '/' + name
-        if category and not category in self.categories:
+
+        if not category or not name or not version:
+            raise DBError("wrong package: " + str(package))
+
+        if not category in self.categories:
             raise InvalidKeyError('Non-existent category: ' + category)
-        if pkgname and not pkgname in self.database:
-            self.database[pkgname] = {}
-        self.database[pkgname][version] = ebuild_data
+
+        if not category in self.database:
+            self.database[category] = {'common_data': {}, 'packages': {}}
+
+        if not name in self.database[category]['packages']:
+            self.database[category]['packages'][name] = {}
+
+        self.database[category]['packages'][name][version] = ebuild_data
+
 
     def list_categories(self):
         """
@@ -393,6 +315,7 @@ class PackageDB(object):
         """
         return list(self.categories)
 
+
     def in_category(self, category, name):
         """
         Tests whether a package is in a given category.
@@ -406,7 +329,12 @@ class PackageDB(object):
         """
         if not category or (not category in self.categories):
             raise InvalidKeyError('No such category: ' + category)
-        return (category + '/' + name) in self.database
+
+        if not category in self.database:
+            return False
+
+        return name in self.database[category]['packages']
+
 
     def list_package_names(self, category):
         """
@@ -420,9 +348,12 @@ class PackageDB(object):
         """
         if not category or (not category in self.categories):
             raise InvalidKeyError('No such category: ' + category)
-        res = [x.split('/')[1] for x in self.database
-               if x.split('/')[0] == category]
-        return res
+
+        if not category in self.database:
+            return []
+
+        return list(self.database[category]['packages'])
+
 
     def list_catpkg_names(self):
         """
@@ -431,7 +362,12 @@ class PackageDB(object):
         Returns:
             List with category/package entries.
         """
-        return list(self.database)
+        result = []
+        for category, cat_data in self.database.items():
+            for name in cat_data['packages']:
+                result.append(category + '/' + name)
+        return result
+
 
     def list_package_versions(self, category, name):
         """
@@ -446,10 +382,13 @@ class PackageDB(object):
         """
         if not category or (not category in self.categories):
             raise InvalidKeyError('No such category: ' + category)
-        pkgname = category + '/' + name
-        if not pkgname in self.database:
-            raise InvalidKeyError('No such package: ' + pkgname)
-        return list(self.database[pkgname])
+
+        if not category in self.database \
+           or not name in self.database[category]['packages']:
+            raise InvalidKeyError('No such package: ' + category + '/' + name)
+
+        return list(self.database[category]['packages'][name])
+
 
     def list_all_packages(self):
         """
@@ -459,12 +398,13 @@ class PackageDB(object):
             List of package_db.Package instances.
         """
         result = []
-        for pkgname, versions in self.database.items():
-            for version in versions:
-                category, name = pkgname.split('/')
-                result.append(Package(category, name, version))
+        for category, cat_data in self.database.items():
+            for name, versions in cat_data['packages'].items():
+                for version in versions:
+                    result.append(Package(category, name, version))
         return result
 
+
     def get_package_description(self, package):
         """
         Get package ebuild data.
@@ -476,8 +416,11 @@ class PackageDB(object):
             Dictionary with package ebuild data.
         """
         #a possible exception should be catched in the caller
-        return self.database[package.category \
-                             + '/' + package.name][package.version]
+        desc = dict(self.database[package.category]['packages']\
+                    [package.name][package.version])
+        desc.update(self.database[package.category]['common_data'])
+        return desc
+
 
     def get_max_version(self, category, name):
         """
@@ -490,10 +433,15 @@ class PackageDB(object):
         Returns:
             The recent version of a package.
         """
+        if not category or (not category in self.categories):
+            raise InvalidKeyError('No such category: ' + category)
+
+        if not category in self.database \
+           or not name in self.database[category]['packages']:
+            raise InvalidKeyError('No such package: ' + category + '/' + name)
+
         pkgname = category + '/' + name
-        if not pkgname in self.database:
-            raise InvalidKeyError('No such package: ' + pkgname)
-        versions = list(self.database[pkgname])
+        versions = list(self.database[category]['packages'][name])
         max_ver = versions[0]
         for version in versions[1:]:
             if portage.pkgcmp(portage.pkgsplit(pkgname + '-' + version),
@@ -510,8 +458,15 @@ class DBGenerator(object):
 
     __slots__ = ('package_db_class')
 
-    def __init__(self, package_db_class=PackageDB):
+    def __init__(self, package_db_class=PackageDB,
+                 preferred_layout_version=1,
+                 preferred_db_version=1,
+                 preferred_category_format=JSON_FILE_SUFFIX):
         self.package_db_class = package_db_class
+        self.preferred_layout_version = preferred_layout_version
+        self.preferred_db_version = preferred_db_version
+        self.preferred_category_format = preferred_category_format
+
 
     def __call__(self, directory, repository,
                  common_config=None, config=None, generate=True):
@@ -539,7 +494,10 @@ class DBGenerator(object):
             Package database.
         """
         db_path = os.path.join(directory, repository, "db")
-        pkg_db = self.package_db_class(db_path)
+        pkg_db = self.package_db_class(db_path,
+                                       
preferred_layout_version=self.preferred_layout_version,
+                                       
preferred_db_version=self.preferred_db_version,
+                                       
preferred_category_format=self.preferred_category_format)
 
         config_f = FileJSON(os.path.join(directory, repository),
                             "config.json", [])
@@ -557,9 +515,10 @@ class DBGenerator(object):
         if generate:
             pkg_db.clean()
             self.generate_tree(pkg_db, common_config, config)
-            pkg_db.write_and_manifest()
+            pkg_db.write()
         return pkg_db
 
+
     def generate_tree(self, pkg_db, common_config, config):
         """
         Generate package entries.
@@ -739,7 +698,7 @@ class DBGenerator(object):
         Hook to convert external dependencies.
         """
         return dependency
-        
+
     def in_config(self, configs, list_name, value):
         """
         Check whether value is in config.

diff --git a/g_sorcery/serialization.py b/g_sorcery/serialization.py
index e051596..780de6f 100644
--- a/g_sorcery/serialization.py
+++ b/g_sorcery/serialization.py
@@ -4,38 +4,100 @@
 """
     serialization.py
     ~~~~~~~~~~~~~~~~
-    
+
     json serialization
-    
-    :copyright: (c) 2013 by Jauhien Piatlicki
+
+    :copyright: (c) 2013-2015 by Jauhien Piatlicki
     :license: GPL-2, see LICENSE for more details.
 """
 
 import json
 import importlib
 
+from .exceptions import SerializationError
+
+def step_to_raw_serializable(obj):
+    """
+    Make one step of convertion of object
+    to the type that is serializable
+    by the json library.
+
+    None return value signifies an error.
+    """
+    if hasattr(obj, "serialize"):
+        if hasattr(obj, "deserialize"):
+            module = obj.__class__.__module__
+            name = obj.__class__.__name__
+            value = obj.serialize()
+            return {"python_module" : module,
+                    "python_class" : name,
+                    "value" : value}
+        else:
+            return obj.serialize()
+    return None
+
+
+def to_raw_serializable(obj):
+    """
+    Convert object to the raw serializable type.
+    Logic is the same as in the standard json encoder.
+    """
+    if isinstance(obj, str) \
+       or obj is None \
+       or obj is True \
+       or obj is False \
+       or isinstance(obj, int) \
+       or isinstance(obj, float) \
+       or isinstance(obj, (list, tuple)) \
+       or isinstance(obj, dict):
+        return obj
+    else:
+        sobj = step_to_raw_serializable(obj)
+        if not sobj:
+            raise SerializationError('Non serializable object: ', sobj)
+        return to_raw_serializable(sobj)
+
+
+def step_from_raw_serializable(sobj):
+    """
+    Make one step of building of object from the
+    raw json serializable type.
+    """
+    if "python_class" in sobj:
+        module = importlib.import_module(sobj["python_module"])
+        cls = getattr(module, sobj["python_class"])
+        return cls.deserialize(sobj["value"])
+    return sobj
+
+
+def from_raw_serializable(sobj):
+    """
+    Build object from the raw serializable object.
+    """
+    if isinstance(sobj, dict):
+        res = {k: from_raw_serializable(v) for k, v in sobj.items()}
+        return step_from_raw_serializable(res)
+    elif isinstance(sobj, list):
+        return [from_raw_serializable(item) for item in sobj]
+    else:
+        return sobj
+
 
 class JSONSerializer(json.JSONEncoder):
     """
     Custom JSON encoder.
 
     Each serializable class should have a method serialize
-    that returns JSON serializable value. If class addfitionally
+    that returns JSON serializable value. If class additionally
     has a classmethod deserialize that it can be deserialized
     and additional metainformation is added to the resulting JSON.
     """
     def default(self, obj):
-        if hasattr(obj, "serialize"):
-            if hasattr(obj, "deserialize"):
-                module = obj.__class__.__module__
-                name = obj.__class__.__name__
-                value = obj.serialize()
-                return {"python_module" : module,
-                        "python_class" : name,
-                        "value" : value}
-            else:
-                return obj.serialize()
-        return json.JSONEncoder.default(self, obj)
+        res = step_to_raw_serializable(obj)
+        if res:
+            return res
+        else:
+            return json.JSONEncoder.default(self, obj)
 
 
 def deserializeHook(json_object):
@@ -46,8 +108,4 @@ def deserializeHook(json_object):
     that takes value (previously returned by serialize method) and transforms
     it into class instance.
     """
-    if "python_class" in json_object:
-        module = importlib.import_module(json_object["python_module"])
-        cls = getattr(module, json_object["python_class"])
-        return cls.deserialize(json_object["value"])
-    return json_object
+    return step_from_raw_serializable(json_object)

diff --git a/gs_db_tool/gs_db_tool.py b/gs_db_tool/gs_db_tool.py
index f949eae..d9087b8 100644
--- a/gs_db_tool/gs_db_tool.py
+++ b/gs_db_tool/gs_db_tool.py
@@ -4,10 +4,10 @@
 """
     gs_db_tool.py
     ~~~~~~~~~~~~~
-    
-    CLI to manipulate with package DB
-    
-    :copyright: (c) 2013 by Jauhien Piatlicki
+
+    CLI to manipulate package DB
+
+    :copyright: (c) 2013-2015 by Jauhien Piatlicki
     :license: GPL-2, see LICENSE for more details.
 """
 
@@ -61,7 +61,7 @@ def transform_db(function):
     def transformator(pkg_db, args):
         pkg_db.read()
         function(pkg_db, args)
-        pkg_db.write_and_manifest()
+        pkg_db.write()
     return transformator
 
 

diff --git a/scripts/all_pythons.sh b/scripts/all_pythons.sh
index 9160dd0..af4c1f1 100755
--- a/scripts/all_pythons.sh
+++ b/scripts/all_pythons.sh
@@ -2,7 +2,7 @@
 
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-for VER in 2.7 3.2 3.3
+for VER in 2.7 3.3 3.4
 do
     echo
     echo "testing python${VER}"

diff --git a/setup.py b/setup.py
index 015ec76..866a38f 100644
--- a/setup.py
+++ b/setup.py
@@ -1,15 +1,38 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+    setup.py
+    ~~~~~~~~
+
+    installation script
+
+    :copyright: (c) 2013-2015 by Jauhien Piatlicki
+    :license: GPL-2, see LICENSE for more details.
+"""
+
+import os
 
 from distutils.core import setup
 
+SELECTABLE = ['bson']
+
+use_defaults = ' '.join(list(SELECTABLE))
+USE = os.environ.get("USE", use_defaults).split()
+
+optional_modules = []
+for mod in SELECTABLE:
+    if mod in USE:
+        optional_modules.append('g_sorcery.%s' % mod)
+
 setup(name          = 'g-sorcery',
-      version       = '0.1',
+      version       = '0.2',
       description   = 'framework for automated ebuild generators',
       author        = 'Jauhien Piatlicki',
       author_email  = '[email protected]',
-      packages      = ['g_sorcery', 'gs_db_tool'],
+      packages      = ['g_sorcery', 'gs_db_tool'] + optional_modules,
       package_data  = {'g_sorcery': ['data/*']},
       scripts       = ['bin/g-sorcery', 'bin/gs-db-tool'],
       data_files    = [('/etc/g-sorcery/', ['g-sorcery.cfg'])],
-      license       = 'GPL',
+      license       = 'GPL-2',
       )

diff --git a/tests/test_PackageDB.py b/tests/test_PackageDB.py
index 373fa4b..f73f006 100644
--- a/tests/test_PackageDB.py
+++ b/tests/test_PackageDB.py
@@ -4,10 +4,10 @@
 """
     test_PackageDB.py
     ~~~~~~~~~~~~~~~~
-    
+
     PackageDB test suite
-    
-    :copyright: (c) 2013 by Jauhien Piatlicki
+
+    :copyright: (c) 2013-2015 by Jauhien Piatlicki
     :license: GPL-2, see LICENSE for more details.
 """
 
@@ -44,7 +44,7 @@ class TestPackageDB(BaseTest):
         for package in packages:
             orig_db.add_package(package, ebuild_data)
 
-        orig_db.write_and_manifest()
+        orig_db.write()
         os.system("cd " + orig_tempdir.name + " && tar cvzf good.tar.gz db")
         os.system("echo invalid >> " + orig_tempdir.name + 
"/db/app-test1/packages.json")
         os.system("cd " + orig_tempdir.name + " && tar cvzf dummy.tar.gz db")
@@ -82,7 +82,7 @@ class TestPackageDB(BaseTest):
             self.assertEqual(data, ebuild_data)
             pkg_set.remove(package)
         self.assertTrue(not pkg_set)
-        
+
 
 def suite():
     suite = unittest.TestSuite()

Reply via email to