Solr4.7源码分析-启动篇之Solr Cloud(二)——solr选举机制

solr cloud启动时,会为观察节点选出leader,在深入solr选举机制前,先梳理下ClusterState.json的层次结构和在创建这个json文件时用到的代码结构。


"autocpltCollection”:{          -----------------------------------DocCollection
"shard1”:{                  -----------------------------------Slice
"core_node1”:{          -----------------------------------Replica


private Integer zkClusterStateVersion;
private final Map<String, DocCollection> collectionStates;  // Map<collectionName, Map<sliceName,Slice>>
private Set<String> liveNodes;
private final ZkStateReader stateReader;

private final String name;
private final Map<String, Slice> slices;
private final Map<String, Slice> activeSlices;
private final DocRouter router;
protected final Map<String,Object> propMap; // 这个在父类ZkNodeProps中

private final String name;
private final DocRouter.Range range;
private final Integer replicationFactor;      // FUTURE: optional per-slice override of the collection replicationFactor
private final Map<String,Replica> replicas;
private final Replica leader;
private final String state;
private final String parent;
private final Map<String, RoutingRule> routingRules;



overseerElector = new LeaderElector(zkClient);
this.overseer = new Overseer(shardHandler, adminPath, zkStateReader);
ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName());
overseerElector.joinElection(context, false);


* Leader Election process. This class contains the logic by which a
* leader is chosen. First call * {@link #setup(ElectionContext)} to ensure
* the election process is init'd. Next call
* {@link #joinElection(ElectionContext, boolean)} to start the leader election.
* The implementation follows the classic ZooKeeper recipe of creating an
* ephemeral, sequential node for each candidate and then looking at the set
* of such nodes - if the created node is the lowest sequential node, the
* candidate that created the node is the leader. If not, the candidate puts
* a watch on the next lowest node it finds, and if that node goes down,
* starts the whole process over by checking if it's the lowest sequential node, etc.


* Set up any ZooKeeper nodes needed for leader election.
public void setup(final ElectionContext context) throws InterruptedException,
KeeperException {
this.context = context;
String electZKPath = context.electionPath + LeaderElector.ELECTION_NODE;

zkCmdExecutor.ensureExists(electZKPath, zkClient);

* Begin participating in the election process. Gets a new sequential number
* and begins watching the node with the sequence number before it, unless it
* is the lowest number, in which case, initiates the leader process. If the
* node that is watched goes down, check if we are the new lowest node, else
* watch the next lowest numbered node.
* @return sequential node number
public int joinElection(ElectionContext context, boolean replacement) throws KeeperException, InterruptedException, IOException {
// 这个调用的OverseerElectionContext.joinedElectionFired,里面就是个Overseer的close,还不知何用
// overseer_elect/election
final String shardsElectZkPath = context.electionPath + LeaderElector.ELECTION_NODE;

long sessionId = zkClient.getSolrZooKeeper().getSessionId();
String id = sessionId + "-" + context.id;
String leaderSeqPath = null;
boolean cont = true;
int tries = 0;
while (cont) {
try {
leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null,
context.leaderSeqPath = leaderSeqPath;
cont = false;
} catch (ConnectionLossException e) {
// we don't know if we made our node or not...
List<String> entries = zkClient.getChildren(shardsElectZkPath, null, true);

boolean foundId = false;
for (String entry : entries) {
String nodeId = getNodeId(entry);
if (id.equals(nodeId)) {
// we did create our node...
foundId  = true;
if (!foundId) {
cont = true;
if (tries++ > 20) {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
try {
} catch (InterruptedException e2) {

} catch (KeeperException.NoNodeException e) {
// we must have failed in creating the election node - someone else must
// be working on it, lets try again
if (tries++ > 20) {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
cont = true;
try {
} catch (InterruptedException e2) {
// 取最后面的数字
int seq = getSeq(leaderSeqPath);
checkIfIamLeader(seq, context, replacement);

return seq;

* Returns int given String of form n_0000000001 or n_0000000003, etc.
* @return sequence number
public static int getSeq(String nStringSequence) {
int seq = 0;
Matcher m = LEADER_SEQ.matcher(nStringSequence);
if (m.matches()) {
seq = Integer.parseInt(m.group(1));
} else {
throw new IllegalStateException("Could not find regex match in:"
+ nStringSequence);
return seq;

* Check if the candidate with the given n_* sequence number is the leader.
* If it is, set the leaderId on the leader zk node. If it is not, start
* watching the candidate that is in line before this one - if it goes down, check
* if this candidate is the leader again.
* @param replacement has someone else been the leader already?
private void checkIfIamLeader(final int seq, final ElectionContext context, boolean replacement) throws KeeperException,
InterruptedException, IOException {
// leader changed - close the overseer
// get all other numbers…
// overseer_elect/election
final String holdElectionPath = context.electionPath + ELECTION_NODE;
List<String> seqs = zkClient.getChildren(holdElectionPath, null, true);

// 依照最后的数字排序
// 截取后面的数字,变成Integer的list
List<Integer> intSeqs = getSeqs(seqs);
if (intSeqs.size() == 0) {
log.warn("Our node is no longer in line to be leader");
if (seq <= intSeqs.get(0)) {
// first we delete the node advertising the old leader in case the ephem is still there
try {
zkClient.delete(context.leaderPath, -1, true);
} catch(Exception e) {
// fine

runIamLeaderProcess(context, replacement);
} else {
// I am not the leader - watch the node below me
int i = 1;
for (; i < intSeqs.size(); i++) {
int s = intSeqs.get(i);
if (seq < s) {
// we found who we come before - watch the guy in front
// 数组index-2,找到前面的那个数字
int index = i - 2;
if (index < 0) {
log.warn("Our node is no longer in line to be leader");
try {
// 在前面的一个数字的节点路径上注册一个getData的watcher,当前面的那个节点发生变化时,触发判断自己是不是leader
zkClient.getData(holdElectionPath + "/" + seqs.get(index),
new Watcher() {

public void process(WatchedEvent event) {
// session events are not change events,
// and do not remove the watcher
if (EventType.None.equals(event.getType())) {
// am I the next leader?
try {
checkIfIamLeader(seq, context, true);
} catch (InterruptedException e) {
// Restore the interrupted status
log.warn("", e);
} catch (IOException e) {
log.warn("", e);
} catch (Exception e) {
log.warn("", e);

}, null, true);
} catch (KeeperException.SessionExpiredException e) {
throw e;
} catch (KeeperException e) {
log.warn("Failed setting watch", e);
// we couldn't set our watch - the node before us may already be down?
// we need to check if we are the leader again
checkIfIamLeader(seq, context, true);

* Sort n string sequence list.
public static void sortSeqs(List<String> seqs) {
Collections.sort(seqs, new Comparator<String>() {

public int compare(String o1, String o2) {
return Integer.valueOf(getSeq(o1)).compareTo(

* Returns int list given list of form n_0000000001, n_0000000003, etc.
* @return int seqs
private List<Integer> getSeqs(List<String> seqs) {
List<Integer> intSeqs = new ArrayList<Integer>(seqs.size());
for (String seq : seqs) {
return intSeqs;

// TODO: get this core param out of here
protected void runIamLeaderProcess(final ElectionContext context, boolean weAreReplacement) throws KeeperException,
InterruptedException, IOException {

void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStartMs) throws KeeperException,
InterruptedException {
log.info("I am going to be the leader {}", id);
final String id = leaderSeqPath
.substring(leaderSeqPath.lastIndexOf("/") + 1);
ZkNodeProps myProps = new ZkNodeProps("id", id);

// 创建节点,在“/overseer_elect/leader/”中写入id
zkClient.makePath(leaderPath, ZkStateReader.toJSON(myProps),
CreateMode.EPHEMERAL, true);
if(pauseBeforeStartMs >0){
try {
} catch (InterruptedException e) {
log.warn("Wait interrupted ", e);


