Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@
import com.linbit.linstor.storage.StorageException;
import com.linbit.linstor.storage.data.adapter.drbd.DrbdRscData;
import com.linbit.linstor.storage.interfaces.categories.resource.AbsRscLayerObject;
import com.linbit.linstor.storage.interfaces.categories.resource.VlmProviderObject;
import com.linbit.linstor.storage.kinds.DeviceLayerKind;
import com.linbit.linstor.storage.kinds.DeviceProviderKind;
import com.linbit.linstor.storage.utils.LayerUtils;
import com.linbit.linstor.tasks.AutoDiskfulTask;
import com.linbit.linstor.utils.layer.LayerRscUtils;
Expand Down Expand Up @@ -387,21 +389,84 @@ private Flux<ApiCallRc> toggleDiskInTransaction(

Resource rsc = ctrlApiDataLoader.loadRsc(nodeName, rscName, true);

// Allow retry of the same operation if the previous attempt failed
// (the requested flag remains set for retry on reconnection, but we should also allow manual retry)
// Also allow cancellation of a failed operation by requesting the opposite operation
if (hasDiskAddRequested(rsc))
{
throw new ApiRcException(ApiCallRcImpl.simpleEntry(
ApiConsts.FAIL_RSC_BUSY,
"Addition of disk to resource already requested",
true
));
if (removeDisk)
{
// User wants to cancel the failed add-disk operation and go back to diskless
// Use the existing disk removal flow to properly cleanup storage on satellite
errorReporter.logInfo(
"Toggle Disk cancel on %s/%s - cancelling failed DISK_ADD_REQUESTED, reverting to diskless",
nodeNameStr, rscNameStr);
unmarkDiskAddRequested(rsc);
// Also clear DISK_ADDING if it was set
unmarkDiskAdding(rsc);

// Set storage pool to diskless pool (overwrite the diskful pool that was set)
Props rscProps = ctrlPropsHelper.getProps(rsc);
rscProps.map().put(ApiConsts.KEY_STOR_POOL_NAME, LinStor.DISKLESS_STOR_POOL_NAME);

// Set DISK_REMOVE_REQUESTED to use the existing disk removal flow
// This will:
// 1. updateAndAdjustDisk sets DISK_REMOVING flag
// 2. Satellite sees DISK_REMOVING and deletes LUKS/storage devices
// 3. finishOperation rebuilds layer stack as diskless
// We keep the existing layer data so satellite can properly cleanup
markDiskRemoveRequested(rsc);

ctrlTransactionHelper.commit();

// Use existing disk removal flow - this will properly cleanup storage on satellite
return Flux
.<ApiCallRc>just(ApiCallRcImpl.singleApiCallRc(
ApiConsts.MODIFIED,
"Cancelling disk addition, reverting to diskless"
))
.concatWith(updateAndAdjustDisk(nodeName, rscName, true, toggleIntoTiebreakerRef, context))
.concatWith(ctrlRscDfnApiCallHandler.get().updateProps(rsc.getResourceDefinition()));
}
// If adding disk and DISK_ADD_REQUESTED is already set, treat as retry
// Simply retry the operation with existing layer data - satellite will handle it idempotently
// NOTE: We don't remove/recreate layer data here because removeLayerData() only deletes
// from controller DB without calling drbdadm down on satellite, leaving orphaned DRBD devices
errorReporter.logInfo(
"Toggle Disk retry on %s/%s - DISK_ADD_REQUESTED already set, retrying operation",
nodeNameStr, rscNameStr);
ctrlTransactionHelper.commit();
return Flux
.<ApiCallRc>just(new ApiCallRcImpl())
.concatWith(updateAndAdjustDisk(nodeName, rscName, false, toggleIntoTiebreakerRef, context))
.concatWith(ctrlRscDfnApiCallHandler.get().updateProps(rsc.getResourceDefinition()));
}
if (hasDiskRemoveRequested(rsc))
{
throw new ApiRcException(ApiCallRcImpl.simpleEntry(
ApiConsts.FAIL_RSC_BUSY,
"Removal of disk from resource already requested",
true
));
if (!removeDisk)
{
// User wants to cancel the failed remove-disk operation
errorReporter.logInfo(
"Toggle Disk cancel on %s/%s - cancelling failed DISK_REMOVE_REQUESTED",
nodeNameStr, rscNameStr);
unmarkDiskRemoveRequested(rsc);
ctrlTransactionHelper.commit();
return Flux.<ApiCallRc>just(
ApiCallRcImpl.singleApiCallRc(
ApiConsts.MODIFIED,
"Cancelled disk removal request"
)
);
}
// If removing disk and DISK_REMOVE_REQUESTED is already set, treat as retry
errorReporter.logInfo(
"Toggle Disk retry on %s/%s - DISK_REMOVE_REQUESTED already set, continuing operation",
nodeNameStr, rscNameStr);
ctrlTransactionHelper.commit();
return Flux
.<ApiCallRc>just(new ApiCallRcImpl())
.concatWith(updateAndAdjustDisk(nodeName, rscName, true, toggleIntoTiebreakerRef, context))
.concatWith(ctrlRscDfnApiCallHandler.get().updateProps(rsc.getResourceDefinition()));
}

if (!removeDisk && !ctrlVlmCrtApiHelper.isDiskless(rsc))
Expand All @@ -412,17 +477,43 @@ private Flux<ApiCallRc> toggleDiskInTransaction(
true
));
}
ResourceDefinition rscDfn = rsc.getResourceDefinition();
AccessContext peerCtx = peerAccCtx.get();

if (removeDisk && ctrlVlmCrtApiHelper.isDiskless(rsc))
{
// Resource is marked as diskless - check if it has orphaned storage layers that need cleanup
AbsRscLayerObject<Resource> layerData = getLayerData(peerCtx, rsc);
if (layerData != null && (LayerUtils.hasLayer(layerData, DeviceLayerKind.LUKS) ||
hasNonDisklessStorageLayer(layerData)))
{
// Resource is marked as diskless but has orphaned storage layers - need cleanup
// Use the existing disk removal flow to properly cleanup storage on satellite
errorReporter.logInfo(
"Toggle Disk cleanup on %s/%s - resource is diskless but has orphaned storage layers, cleaning up",
nodeNameStr, rscNameStr);

// Set DISK_REMOVE_REQUESTED to use the existing disk removal flow
// This will trigger proper satellite cleanup via DISK_REMOVING flag
markDiskRemoveRequested(rsc);

ctrlTransactionHelper.commit();

// Use existing disk removal flow - this will properly cleanup storage on satellite
return Flux
.<ApiCallRc>just(ApiCallRcImpl.singleApiCallRc(
ApiConsts.MODIFIED,
"Cleaning up orphaned storage layers"
))
.concatWith(updateAndAdjustDisk(nodeName, rscName, true, toggleIntoTiebreakerRef, context))
.concatWith(ctrlRscDfnApiCallHandler.get().updateProps(rsc.getResourceDefinition()));
}
throw new ApiRcException(ApiCallRcImpl.simpleEntry(
ApiConsts.WARN_RSC_ALREADY_DISKLESS,
"Resource already diskless",
true
));
}

ResourceDefinition rscDfn = rsc.getResourceDefinition();
AccessContext peerCtx = peerAccCtx.get();
if (removeDisk)
{
// Prevent removal of the last disk
Expand Down Expand Up @@ -1446,6 +1537,30 @@ private void unmarkDiskAdding(Resource rsc)
}
}

private void unmarkDiskAddRequested(Resource rsc)
{
try
{
rsc.getStateFlags().disableFlags(apiCtx, Resource.Flags.DISK_ADD_REQUESTED);
}
catch (AccessDeniedException | DatabaseException exc)
{
throw new ImplementationError(exc);
}
}

private void unmarkDiskRemoveRequested(Resource rsc)
{
try
{
rsc.getStateFlags().disableFlags(apiCtx, Resource.Flags.DISK_REMOVE_REQUESTED);
}
catch (AccessDeniedException | DatabaseException exc)
{
throw new ImplementationError(exc);
}
}

private void markDiskAdded(Resource rscData)
{
try
Expand Down Expand Up @@ -1511,6 +1626,41 @@ static AbsRscLayerObject<Resource> getLayerData(AccessContext accCtx, Resource r
return layerData;
}

/**
* Check if the layer stack has a non-diskless STORAGE layer.
* This is used to detect orphaned storage layers that need cleanup.
*/
private boolean hasNonDisklessStorageLayer(AbsRscLayerObject<Resource> layerDataRef)
{
boolean hasNonDiskless = false;
if (layerDataRef != null)
{
if (layerDataRef.getLayerKind() == DeviceLayerKind.STORAGE)
{
for (VlmProviderObject<Resource> vlmData : layerDataRef.getVlmLayerObjects().values())
{
if (vlmData.getProviderKind() != DeviceProviderKind.DISKLESS)
{
hasNonDiskless = true;
break;
}
}
}
if (!hasNonDiskless)
{
for (AbsRscLayerObject<Resource> child : layerDataRef.getChildren())
{
if (hasNonDisklessStorageLayer(child))
{
hasNonDiskless = true;
break;
}
}
}
}
return hasNonDiskless;
}

private LockGuard createLockGuard()
{
return lockGuardFactory.buildDeferred(LockType.WRITE, LockObj.NODES_MAP, LockObj.RSC_DFN_MAP);
Expand Down