On Thu, Feb 02, 2012 at 01:11:24PM +0100, Manel Gimeno Zaragozá wrote:

What version?

> Everything is working fine between Master & Replica until Replica goes down 
> (maintenance or whatever). Once is down, in master there are still 
> connections and modification. If I bring up Replica the changes made during 
> the "blackout" are no fully transmitted to it. Some of the are transmitted, 
> but not all.
> 
> Which is the best way to resincronize the missing parts to Replica? 

sync_client -r -f $file for each file in the conf/sync/ directory.

> I was wordering to rsync imap folders (/var/spool/imap & /var/lib/imap), and 
> It works fine in my test environment, but in produccion it could be GB to 
> sincronize and I think this is not optimum.

No, that's awful.  Don't do that.

> Other option was "sync_client -l -u <all_user>" but I'm guessing that in 
> production I will have the same problem due to the amount of data to 
> sincronize.

It's not too bad.  We run it occasionally if we suspect things have
got confused for other reasons.

> On the other hand, I have also doubts about how "sync_client -r" and 
> "sync_server" are initiated. In my test machines I should run manually them 
> each time I start cyrus-imapd, I though that it could be automatic one I 
> start the service cyrus-imapd, but it is not.

I have attached the script that we run.  It has lots of hooks into
our own systems of course.

I would LIKE to make something more generic that's part of Cyrus
itself, rather than having external tooling.  Of course, our external
tooling is a bit special-cased as well.  But at least having a basic
"keep trying to replicate all the records" would be good.

Bron.
#!/usr/bin/perl -w

BEGIN { do "/home/mod_perl/hm/ME/FindLibs.pm"; }

use strict;
use warnings;

use ME::FMVars;
use ME::ImapStore;
use ME::ImapSlot;
use ME::Machine;
use ME::Notify qw(NotifyAdmins);
use IO::LockedFile;
use IO::File;
use Getopt::Std;
use Data::Dumper;

my %Opts;
getopts('Cvnl:r:fa', \%Opts);

my $MachineName = ME::Machine->Name();

my $RateLimit = 5000;
my $OneoffLimit = $Opts{l} || $RateLimit;

# easy way to suppress cron runs
unless ($Opts{f}) {
  exit 0 if -e "/etc/nomonitorsync";
}

foreach my $Slot (ME::Machine->ImapSlots()) {
  my $SlotName = $Slot->Name();

  unless ($Opts{a} or $Slot->IsMaster()) {
    print "$SlotName: not master\n" if $Opts{v};
    next;
  }

  my @OtherSlots = $Slot->OtherSlots();
  unless (@OtherSlots) {
    print "$SlotName: not replicated\n" if $Opts{v};
    next;
  }

  my $ConfDir = $Slot->CyrusConfigPath();

  my %dirs;
  my $ImapdConf = $Slot->ImapdConf();
  if ($ImapdConf->{sync_log_channels}) {
    foreach my $channel (split / /, $ImapdConf->{sync_log_channels}) {
      $dirs{$channel} = "$ConfDir/sync/$channel";
    }
    my %channels = map { $_ => 1 } split / /, $ImapdConf->{sync_log_channels};
    foreach my $osl (map { $_->Name() } @OtherSlots) {
      delete $channels{$osl};
    }
    delete $channels{oslotest};
    if (keys %channels) {
        warn "$SlotName: extra channels in imapd configuration file - "
             .join(" ", keys %channels) . "\n";
    }
  } elsif ($ImapdConf->{sync_host}) {
     $dirs{''} = "$ConfDir/sync";
  }

  opendir(DH, "$ConfDir/sync/");
  while(my $item = readdir(DH)) {
    next if $item =~ m/^\./;
    next unless -d "$ConfDir/sync/$item";
    next if $dirs{$item}; # channel exists
    warn "$SlotName: extra channel $item\n";
    next unless $Opts{C};
    warn "rm -rf $ConfDir/sync/$item\n";
    system("rm -rf $ConfDir/sync/$item\n");
  }

  my $DH;
  foreach my $channel (sort keys %dirs) {
    print "Doing slot $SlotName/$channel\n" if $Opts{v};
    my $dir = $dirs{$channel};
    next unless -d $dir; # no logs!
    if (-f "$dir/shutdown") {
      print "$SlotName/$channel: shutdown file exists, skipping\n";
      next;
    }
    my $OtherSlot;
    if ($channel) {
      $OtherSlot = eval { ME::ImapSlot->new($channel) };
    } else {
      # There is only one replica in this case
      $OtherSlot = $OtherSlots[0];
    }
    next unless $OtherSlot;
    unless ($OtherSlot->IsRunning()) {
      print "$SlotName/$channel: ignoring, replica is down\n" if $Opts{v};
      next;
    }
    if ($Opts{r} and not $OtherSlot->Machine->HasRole($Opts{r})) {
      print "$SlotName/$channel: not syncing, doesn't have role $Opts{r}\n";
      next;
    }

    my @rest;
    @rest = (-n => $channel) if $channel;
    my $Lock = IO::LockedFile->new({block => 0}, ">$dir/monitorsync.lock");
    unless ($Lock) {
      print "$SlotName/$channel: already locked, skipping\n" if $Opts{v};
      next;
    }
    my @pids = get_pids($SlotName, $channel);
    my @ran;
    next unless opendir($DH, $dir);
    while (my $item = readdir($DH)) {
      if ($item eq 'log' and not @pids) {
        $item = "slog-$$";
        print ("renaming log to $item\n");
        rename("$dir/log", "$dir/$item");
      }
      next unless $item =~ m/^(?:s\d*)?log-(\d+)$/;
      my $pid = $1 || '';

      # check if pid exists
      if ($pid and my $fh = IO::File->new("</proc/$pid/cmdline")) {
        local $/;
        my $cmdline = <$fh>;
        $fh->close();
        if ($cmdline =~ m/sync_client.*$SlotName/s) {
          print "$SlotName/$channel: Skipping log file log-$pid, process is running\n" if $Opts{v};
          next;
        }
      }

      print "$SlotName/$channel: Syncing file $item\n" if $Opts{v};

      # returns an empty string on success
      my $res = $Slot->RunCommand({RateLimit => $RateLimit},
                                  'sync_client', '-o', '-r',
                                  '-f' => "$dir/$item", @rest);

      # failure
      if ($? or $res =~ m/\S/) {
        print "$SlotName: Failed $channel $item, notifying ($res, $?)\n" if $Opts{v};
        NotifyAdmins('email', "$MachineName/$SlotName/$channel sync_client failed on $item", "$res (error: $?)");
      }

      # success :)
      else {
        print "$SlotName/$channel: Done $item, deleting\n" if $Opts{v};
        push @ran, $item;
        unlink("$dir/$item");
      }
    }
    closedir($DH);
    if (@ran) {
      my $num = @ran;
      NotifyAdmins('email', "$MachineName/$SlotName/$channel sync_client ran $num leftover logs", join("\n", @ran)) if $Opts{n};
    }
    unless (@pids) {
      # start a new one
      print "$SlotName: Starting a new sync_client, old one gone away\n" if $Opts{v};
      NotifyAdmins('email', "$MachineName/$SlotName/$channel sync_client missing, starting a new one") if $Opts{n};
      $Slot->RunCommand({Daemon => 1, RateLimit => $RateLimit},
                        'sync_client', '-r', '-o', '-v', @rest);
    }
  }

}

print "Finished\n" if $Opts{v};

sub get_pids {
  my $SlotName = shift;
  my $Channel = shift;
  my @res;

  if (open(my $FH, "ps axww |")) {
    while (<$FH>) {
      next unless m/sync_client/;
      next if m/ -f/; # specific file, we don't want that
      next unless m/ -r/; # needs to be rolling
      next unless m/imapd-$SlotName/;
      next unless (not $Channel or m/-n $Channel/);
      next unless m/^\s*(\d+)/;
      my $Pid = $1;
      push @res, $Pid;
    }
  }

  return @res;
}
----
Cyrus Home Page: http://www.cyrusimap.org/
List Archives/Info: http://lists.andrew.cmu.edu/pipermail/info-cyrus/

Reply via email to