[CentOS] DRBD on a xen host: crash on high I/O

Tue Jul 28 18:11:38 UTC 2009
Andrea Dell'Amico <adellam at sevenseas.org>

Hello,
I have a couple of Dell 2950 III, both of them with CentOS 5.3, Xen,
drbd 8.2 and cluster suite.
Hardware: 32DB RAM, RAID 5 with 6 SAS disks (one hot spare) on a PERC/6
controller.

I configured DRBD to use the main network interfaces (bnx2 driver), with
bonding and crossover cables to have a direct link.
The normal network traffic uses two different network cards.
There are two DRBD resources for a total of a little less than 1TB.

When the two hosts are in sync, if I activate more than a few (six or
seven) xen guests, the master server crashes spectacularly and reboots.

I've seen a kernel dump over the serial console, but the machine
restarts immediately so I didn't write it down.

Unfortunately I cannot experiment because I have production services on
those machines (and they are working fine until I start drbd on the
slave).

drbd configuration is attached.

Anybody has an idea of the problem? The crash is perfectly reproducible,
and drbd seems to be the problem (maybe the Xen kernel helps?).

Thanks in advance,
Andrea
-- 
Question: “Is there a God?”
Answer: “No.”
From the Official God FAQ, http://www.400monkeys.com/God/

-------------- next part --------------
#
# At most ONE global section is allowed.
# It must precede any resource section.
#
global {
    # minor-count 64;
    # dialog-refresh 5; # 5 seconds
    # disable-ip-verification;
    usage-count no;
}


common {
  syncer { rate 50M; }
}

#
# this need not be r#, you may use phony resource names,
# like "resource web" or "resource mail", too
#

resource virtual1 {

  protocol C;

  handlers {
    pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
    pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
    local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
    #outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
    #pri-lost "echo pri-lost. Have a look at the log files. | mail -s 'DRBD Alert' root";
    #split-brain "echo split-brain. drbdadm -- --discard-my-data connect $DRBD_RESOURCE ? | mail -s 'DRBD Alert' root";
    #out-of-sync "echo out-of-sync. drbdadm down $DRBD_RESOURCE. drbdadm ::::0 set-gi $DRBD_RESOURCE. drbdadm up $DRBD_RESOURCE. | mail -s 'DRBD Alert' root";
    #before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
    #after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
  }

  startup {
    wfc-timeout  120;
    degr-wfc-timeout 120;    # 2 minutes.
    # wait-after-sb;
  }

  disk {
    on-io-error   pass_on;
    fencing resource-and-stonith;
    # ONLY USE THIS OPTION IF YOU KNOW WHAT YOU ARE DOING.
    # no-disk-flushes;
    # no-md-flushes;
    max-bio-bvecs 1;
  }

  net {
    # max-buffers     2048;
    # unplug-watermark   128;
    # max-epoch-size  2048;
    # ko-count 4;
    cram-hmac-alg "sha1";
    shared-secret "bah";
    after-sb-0pri discard-younger-primary;
    after-sb-1pri call-pri-lost-after-sb;
    after-sb-2pri call-pri-lost-after-sb;
    rr-conflict disconnect;
    # data-integrity-alg "md5";
  }

  syncer {
    rate 50M;
    #after "r2";
    al-extents 257;
    # cpu-mask 15;
  }

  on server-1 {
    device     /dev/drbd1;
    disk       /dev/sda2;
    address    192.168.2.1:7788;
    meta-disk  internal;
  }

  on server-2 {
    device    /dev/drbd1;
    disk      /dev/sda4;
    address   192.168.2.2:7788;
    meta-disk internal;
  }
}

resource servizi0 {

  #**********
  #
  protocol C;

  handlers {
    pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
    pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
    local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
    #outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
    #pri-lost "echo pri-lost. Have a look at the log files. | mail -s 'DRBD Alert' root";
    # Notify someone in case DRBD split brained. 
    #split-brain "echo split-brain. drbdadm -- --discard-my-data connect $DRBD_RESOURCE ? | mail -s 'DRBD Alert' root";
    # Notify someone in case an online verify run found the backing devices out of sync.
    #out-of-sync "echo out-of-sync. drbdadm down $DRBD_RESOURCE. drbdadm ::::0 set-gi $DRBD_RESOURCE. drbdadm up $DRBD_RESOURCE. | mail -s 'DRBD Alert' root";
    #
    #before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
    #after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
  }

  startup {
    wfc-timeout  120;
    degr-wfc-timeout 120;    # 2 minutes.
    # wait-after-sb;
    # become-primary-on both;
  }

  disk {
    on-io-error   pass_on;
    fencing resource-and-stonith;
    # size 10G;
    # no-disk-flushes;
    # no-md-flushes;
    max-bio-bvecs 1;
  }

  net {
    # sndbuf-size 512k;
    # timeout       60;    #  6 seconds  (unit = 0.1 seconds)
    # connect-int   10;    # 10 seconds  (unit = 1 second)
    # ping-int      10;    # 10 seconds  (unit = 1 second)
    # ping-timeout   5;    # 500 ms (unit = 0.1 seconds)
    # max-buffers     2048;
    # unplug-watermark   128;
    # max-epoch-size  2048;
    # ko-count 4;
    # allow-two-primaries;
    cram-hmac-alg "sha1";
    shared-secret "serviziDRBDcosasegretissima";
    after-sb-0pri discard-younger-primary;
    after-sb-1pri call-pri-lost-after-sb;
    after-sb-2pri call-pri-lost-after-sb;
    rr-conflict disconnect;
    # data-integrity-alg "md5";
  }

  syncer {
    rate 50M;
    #after "r2";
    al-extents 257;
    # cpu-mask 15;
  }

  on server-1 {
    device     /dev/drbd0;
    disk       /dev/sda5;
    address    192.168.2.1:7789;
    meta-disk  internal;
  }

  on server-2 {
    device    /dev/drbd0;
    disk      /dev/sda3;
    address   192.168.2.2:7789;
    meta-disk internal;
  }
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 307 bytes
Desc: This is a digitally signed message part
URL: <http://lists.centos.org/pipermail/centos/attachments/20090728/0f4dc1cb/attachment-0003.sig>