package org.apache.cassandra.repair;
import java.net.InetAddress;
import java.util.*;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.Keyspace;
import org.apache.cassandra.dht.Range;
import org.apache.cassandra.dht.Token;
import org.apache.cassandra.tracing.Tracing;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.MerkleTrees;
import org.apache.cassandra.utils.Pair;
public class RepairJob extends AbstractFuture<RepairResult> implements Runnable
{
private static Logger logger = LoggerFactory.getLogger(RepairJob.class);
private final RepairSession session;
private final RepairJobDesc desc;
private final RepairParallelism parallelismDegree;
private final long repairedAt;
private final ListeningExecutorService taskExecutor;
public RepairJob(RepairSession session, String columnFamily)
{
this.session = session;
this.desc = new RepairJobDesc(session.parentRepairSession, session.getId(), session.keyspace, columnFamily, session.getRanges());
this.repairedAt = session.repairedAt;
this.taskExecutor = session.taskExecutor;
this.parallelismDegree = session.parallelismDegree;
}
public void run()
{
List<InetAddress> allEndpoints = new ArrayList<>(session.endpoints);
allEndpoints.add(FBUtilities.getBroadcastAddress());
ListenableFuture<List<TreeResponse>> validations;
if (parallelismDegree != RepairParallelism.PARALLEL)
{
List<ListenableFuture<InetAddress>> snapshotTasks = new ArrayList<>(allEndpoints.size());
for (InetAddress endpoint : allEndpoints)
{
SnapshotTask snapshotTask = new SnapshotTask(desc, endpoint);
snapshotTasks.add(snapshotTask);
taskExecutor.execute(snapshotTask);
}
ListenableFuture<List<InetAddress>> allSnapshotTasks = Futures.allAsList(snapshotTasks);
validations = Futures.transform(allSnapshotTasks, new AsyncFunction<List<InetAddress>, List<TreeResponse>>()
{
public ListenableFuture<List<TreeResponse>> apply(List<InetAddress> endpoints)
{
if (parallelismDegree == RepairParallelism.SEQUENTIAL)
return sendSequentialValidationRequest(endpoints);
else
return sendDCAwareValidationRequest(endpoints);
}
}, taskExecutor);
}
else
{
validations = sendValidationRequest(allEndpoints);
}
ListenableFuture<List<SyncStat>> syncResults = Futures.transform(validations, new AsyncFunction<List<TreeResponse>, List<SyncStat>>()
{
public ListenableFuture<List<SyncStat>> apply(List<TreeResponse> trees)
{
return Futures.allAsList(createSyncTasks(trees, FBUtilities.getLocalAddress()));
}
}, taskExecutor);
Futures.addCallback(syncResults, new FutureCallback<List<SyncStat>>()
{
public void onSuccess(List<SyncStat> stats)
{
logger.info("[repair #{}] {} is fully synced", session.getId(), desc.columnFamily);
SystemDistributedKeyspace.successfulRepairJob(session.getId(), desc.keyspace, desc.columnFamily);
set(new RepairResult(desc, stats));
}
public void onFailure(Throwable t)
{
logger.warn("[repair #{}] {} sync failed", session.getId(), desc.columnFamily);
SystemDistributedKeyspace.failedRepairJob(session.getId(), desc.keyspace, desc.columnFamily, t);
setException(t);
}
}, taskExecutor);
Futures.getUnchecked(validations);
}
@VisibleForTesting
List<SyncTask> createSyncTasks(List<TreeResponse> trees, InetAddress local)
{
List<SyncTask> syncTasks = new ArrayList<>();
for (int i = 0; i < trees.size() - 1; ++i)
{
TreeResponse r1 = trees.get(i);
for (int j = i + 1; j < trees.size(); ++j)
{
TreeResponse r2 = trees.get(j);
SyncTask task;
List<Range<Token>> differences = MerkleTrees.difference(r1.trees, r2.trees);
if (r1.endpoint.equals(local) || r2.endpoint.equals(local))
{
task = new LocalSyncTask(desc, r1.endpoint, r2.endpoint, differences, repairedAt, session.pullRepair);
}
else
{
task = new RemoteSyncTask(desc, r1.endpoint, r2.endpoint, differences);
session.waitForSync(Pair.create(desc, new NodePair(r1.endpoint, r2.endpoint)), (RemoteSyncTask) task);
}
syncTasks.add(task);
taskExecutor.submit(task);
}
}
return syncTasks;
}
private ListenableFuture<List<TreeResponse>> sendValidationRequest(Collection<InetAddress> endpoints)
{
String message = String.format("Requesting merkle trees for %s (to %s)", desc.columnFamily, endpoints);
logger.info("[repair #{}] {}", desc.sessionId, message);
Tracing.traceRepair(message);
int gcBefore = Keyspace.open(desc.keyspace).getColumnFamilyStore(desc.columnFamily).gcBefore(FBUtilities.nowInSeconds());
List<ListenableFuture<TreeResponse>> tasks = new ArrayList<>(endpoints.size());
for (InetAddress endpoint : endpoints)
{
ValidationTask task = new ValidationTask(desc, endpoint, gcBefore);
tasks.add(task);
session.waitForValidation(Pair.create(desc, endpoint), task);
taskExecutor.execute(task);
}
return Futures.allAsList(tasks);
}
private ListenableFuture<List<TreeResponse>> sendSequentialValidationRequest(Collection<InetAddress> endpoints)
{
String message = String.format("Requesting merkle trees for %s (to %s)", desc.columnFamily, endpoints);
logger.info("[repair #{}] {}", desc.sessionId, message);
Tracing.traceRepair(message);
int gcBefore = Keyspace.open(desc.keyspace).getColumnFamilyStore(desc.columnFamily).gcBefore(FBUtilities.nowInSeconds());
List<ListenableFuture<TreeResponse>> tasks = new ArrayList<>(endpoints.size());
Queue<InetAddress> requests = new LinkedList<>(endpoints);
InetAddress address = requests.poll();
ValidationTask firstTask = new ValidationTask(desc, address, gcBefore);
logger.info("Validating {}", address);
session.waitForValidation(Pair.create(desc, address), firstTask);
tasks.add(firstTask);
ValidationTask currentTask = firstTask;
while (requests.size() > 0)
{
final InetAddress nextAddress = requests.poll();
final ValidationTask nextTask = new ValidationTask(desc, nextAddress, gcBefore);
tasks.add(nextTask);
Futures.addCallback(currentTask, new FutureCallback<TreeResponse>()
{
public void onSuccess(TreeResponse result)
{
logger.info("Validating {}", nextAddress);
session.waitForValidation(Pair.create(desc, nextAddress), nextTask);
taskExecutor.execute(nextTask);
}
public void onFailure(Throwable t) {}
});
currentTask = nextTask;
}
taskExecutor.execute(firstTask);
return Futures.allAsList(tasks);
}
private ListenableFuture<List<TreeResponse>> sendDCAwareValidationRequest(Collection<InetAddress> endpoints)
{
String message = String.format("Requesting merkle trees for %s (to %s)", desc.columnFamily, endpoints);
logger.info("[repair #{}] {}", desc.sessionId, message);
Tracing.traceRepair(message);
int gcBefore = Keyspace.open(desc.keyspace).getColumnFamilyStore(desc.columnFamily).gcBefore(FBUtilities.nowInSeconds());
List<ListenableFuture<TreeResponse>> tasks = new ArrayList<>(endpoints.size());
Map<String, Queue<InetAddress>> requestsByDatacenter = new HashMap<>();
for (InetAddress endpoint : endpoints)
{
String dc = DatabaseDescriptor.getEndpointSnitch().getDatacenter(endpoint);
Queue<InetAddress> queue = requestsByDatacenter.get(dc);
if (queue == null)
{
queue = new LinkedList<>();
requestsByDatacenter.put(dc, queue);
}
queue.add(endpoint);
}
for (Map.Entry<String, Queue<InetAddress>> entry : requestsByDatacenter.entrySet())
{
Queue<InetAddress> requests = entry.getValue();
InetAddress address = requests.poll();
ValidationTask firstTask = new ValidationTask(desc, address, gcBefore);
logger.info("Validating {}", address);
session.waitForValidation(Pair.create(desc, address), firstTask);
tasks.add(firstTask);
ValidationTask currentTask = firstTask;
while (requests.size() > 0)
{
final InetAddress nextAddress = requests.poll();
final ValidationTask nextTask = new ValidationTask(desc, nextAddress, gcBefore);
tasks.add(nextTask);
Futures.addCallback(currentTask, new FutureCallback<TreeResponse>()
{
public void onSuccess(TreeResponse result)
{
logger.info("Validating {}", nextAddress);
session.waitForValidation(Pair.create(desc, nextAddress), nextTask);
taskExecutor.execute(nextTask);
}
public void onFailure(Throwable t) {}
});
currentTask = nextTask;
}
taskExecutor.execute(firstTask);
}
return Futures.allAsList(tasks);
}
}