diff --git a/cmd/containerd-shim-lcow-v2/service/service.go b/cmd/containerd-shim-lcow-v2/service/service.go index c44b4590db..72b685dc36 100644 --- a/cmd/containerd-shim-lcow-v2/service/service.go +++ b/cmd/containerd-shim-lcow-v2/service/service.go @@ -14,7 +14,7 @@ import ( "github.com/Microsoft/hcsshim/internal/shimdiag" sandboxsvc "github.com/containerd/containerd/api/runtime/sandbox/v1" - tasksvc "github.com/containerd/containerd/api/runtime/task/v2" + tasksvc "github.com/containerd/containerd/api/runtime/task/v3" "github.com/containerd/containerd/v2/core/runtime" "github.com/containerd/containerd/v2/pkg/namespaces" "github.com/containerd/containerd/v2/pkg/shutdown" @@ -91,7 +91,7 @@ func NewService(ctx context.Context, eventsPublisher shim.Publisher, sd shutdown // RegisterTTRPC registers the Task, Sandbox, and ShimDiag TTRPC services on // the provided server so that containerd can call into the shim over TTRPC. func (s *Service) RegisterTTRPC(server *ttrpc.Server) error { - tasksvc.RegisterTaskService(server, s) + tasksvc.RegisterTTRPCTaskService(server, s) sandboxsvc.RegisterTTRPCSandboxService(server, s) shimdiag.RegisterShimDiagService(server, s) return nil diff --git a/cmd/containerd-shim-lcow-v2/service/service_task.go b/cmd/containerd-shim-lcow-v2/service/service_task.go index 98ed3491d6..b1158082ae 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_task.go +++ b/cmd/containerd-shim-lcow-v2/service/service_task.go @@ -9,14 +9,14 @@ import ( "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" - "github.com/containerd/containerd/api/runtime/task/v2" + "github.com/containerd/containerd/api/runtime/task/v3" "github.com/containerd/errdefs/pkg/errgrpc" "go.opencensus.io/trace" "google.golang.org/protobuf/types/known/emptypb" ) // Ensure Service implements the TTRPCTaskService interface at compile time. -var _ task.TaskService = &Service{} +var _ task.TTRPCTaskService = &Service{} // State returns the current state of a task or process. // This method is part of the instrumentation layer and business logic is included in stateInternal. diff --git a/cmd/containerd-shim-lcow-v2/service/service_task_internal.go b/cmd/containerd-shim-lcow-v2/service/service_task_internal.go index 9e417ba741..677b52f4c5 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_task_internal.go +++ b/cmd/containerd-shim-lcow-v2/service/service_task_internal.go @@ -12,6 +12,7 @@ import ( container "github.com/Microsoft/hcsshim/internal/controller/linuxcontainer" "github.com/Microsoft/hcsshim/internal/controller/pod" "github.com/Microsoft/hcsshim/internal/controller/process" + "github.com/Microsoft/hcsshim/internal/controller/vm" "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" @@ -23,7 +24,7 @@ import ( "github.com/Microsoft/hcsshim/pkg/ctrdtaskapi" eventstypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/api/runtime/task/v2" + "github.com/containerd/containerd/api/runtime/task/v3" "github.com/containerd/errdefs" "github.com/containerd/typeurl/v2" "github.com/opencontainers/runtime-spec/specs-go" @@ -69,10 +70,6 @@ func (s *Service) getPodController(podID string) (*pod.Controller, bool) { // stateInternal returns the current status of a process within a container. func (s *Service) stateInternal(_ context.Context, request *task.StateRequest) (*task.StateResponse, error) { - if err := s.ensureVMRunning(); err != nil { - return nil, err - } - // Look up the container controller for the requested container. ctrCtrl, err := s.getContainerController(request.ID) if err != nil { @@ -277,10 +274,6 @@ func (s *Service) startInternal(ctx context.Context, request *task.StartRequest) // deleteInternal deletes a process, container, or pod sandbox depending on the request. func (s *Service) deleteInternal(ctx context.Context, request *task.DeleteRequest) (*task.DeleteResponse, error) { - if err := s.ensureVMRunning(); err != nil { - return nil, err - } - // Look up the container controller for the target ID. ctrCtrl, err := s.getContainerController(request.ID) if err != nil { @@ -328,7 +321,7 @@ func (s *Service) deleteInternal(ctx context.Context, request *task.DeleteReques // left should be the sandbox container itself (request.ID). remaining := podCtrl.ListContainers() delete(remaining, request.ID) // exclude the sandbox container itself - if len(remaining) > 0 { + if len(remaining) > 0 && s.vmController.State() == vm.StateRunning { return nil, fmt.Errorf("cannot delete sandbox container %s: %d workload container(s) still exist in the pod: %w", request.ID, len(remaining), errdefs.ErrFailedPrecondition) } diff --git a/internal/controller/linuxcontainer/container.go b/internal/controller/linuxcontainer/container.go index 72cbb16df5..03320485f6 100644 --- a/internal/controller/linuxcontainer/container.go +++ b/internal/controller/linuxcontainer/container.go @@ -4,6 +4,7 @@ package linuxcontainer import ( "context" + "errors" "fmt" "sync" "time" @@ -12,7 +13,6 @@ import ( "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" "github.com/Microsoft/hcsshim/internal/controller/process" "github.com/Microsoft/hcsshim/internal/gcs" - "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" @@ -25,7 +25,7 @@ import ( "github.com/Microsoft/go-winio/pkg/guid" eventstypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/api/runtime/task/v2" + "github.com/containerd/containerd/api/runtime/task/v3" containerdtypes "github.com/containerd/containerd/api/types/task" "github.com/containerd/errdefs" "github.com/containerd/typeurl/v2" @@ -220,6 +220,14 @@ func (c *Controller) closeContainer() { } } +// isResourceAlreadyReleased reports whether a teardown failure indicates the +// resource is already gone — either the in-guest agent died (so guest-side +// state went with it) or the VM itself has exited (so host-side state went +// with it). Either way, teardown can move on instead of retrying. +func isResourceAlreadyReleased(err error) bool { + return errors.Is(err, gcs.ErrBridgeClosed) || vmutils.IsVMNotAvailableError(err) +} + // releaseResources undoes each allocation in reverse order. // It is idempotent — subsequent calls after the first are no-ops. func (c *Controller) releaseResources(ctx context.Context) error { @@ -236,7 +244,7 @@ func (c *Controller) releaseResources(ctx context.Context) error { ContainerRootPath: c.layers.rootfsPath, Layers: hcsLayers, ScratchPath: c.layers.scratch.guestPath, - }); err != nil { + }); err != nil && !isResourceAlreadyReleased(err) { return fmt.Errorf("remove combined layers from guest: %w", err) } @@ -248,7 +256,7 @@ func (c *Controller) releaseResources(ctx context.Context) error { // unmapped on a prior call. var zeroGUID guid.GUID if c.layers != nil && c.layers.scratch.id != zeroGUID { - if err := c.scsi.UnmapFromGuest(ctx, c.layers.scratch.id); err != nil { + if err := c.scsi.UnmapFromGuest(ctx, c.layers.scratch.id); err != nil && !isResourceAlreadyReleased(err) { return fmt.Errorf("unmap scratch layer: %w", err) } c.layers.scratch = scsiReservation{} @@ -258,7 +266,7 @@ func (c *Controller) releaseResources(ctx context.Context) error { // resumes from the first failure. if c.layers != nil { for i, layer := range c.layers.roLayers { - if err := c.scsi.UnmapFromGuest(ctx, layer.id); err != nil { + if err := c.scsi.UnmapFromGuest(ctx, layer.id); err != nil && !isResourceAlreadyReleased(err) { c.layers.roLayers = c.layers.roLayers[i:] return fmt.Errorf("unmap ro layer: %w", err) } @@ -267,7 +275,7 @@ func (c *Controller) releaseResources(ctx context.Context) error { // Unmap additional SCSI mounts. for i, id := range c.scsiResources { - if err := c.scsi.UnmapFromGuest(ctx, id); err != nil { + if err := c.scsi.UnmapFromGuest(ctx, id); err != nil && !isResourceAlreadyReleased(err) { c.scsiResources = c.scsiResources[i:] return fmt.Errorf("unmap scsi resource: %w", err) } @@ -275,7 +283,7 @@ func (c *Controller) releaseResources(ctx context.Context) error { // Unmap Plan9 shares. for i, id := range c.plan9Resources { - if err := c.plan9.UnmapFromGuest(ctx, id); err != nil { + if err := c.plan9.UnmapFromGuest(ctx, id); err != nil && !isResourceAlreadyReleased(err) { c.plan9Resources = c.plan9Resources[i:] return fmt.Errorf("unmap plan9 share: %w", err) } @@ -283,7 +291,7 @@ func (c *Controller) releaseResources(ctx context.Context) error { // Remove VPCI devices. for i, id := range c.devices { - if err := c.vpci.RemoveFromVM(ctx, id); err != nil { + if err := c.vpci.RemoveFromVM(ctx, id); err != nil && !isResourceAlreadyReleased(err) { c.devices = c.devices[i:] return fmt.Errorf("remove vpci device: %w", err) } @@ -295,7 +303,7 @@ func (c *Controller) releaseResources(ctx context.Context) error { if !c.isContainerStateDeleted && c.guest.Capabilities().IsDeleteContainerStateSupported() { // GCS bridge evicts the container from its host-state map even if the inner Delete fails, // so retries will always return not-found. - if err := c.guest.DeleteContainerState(ctx, c.gcsContainerID); err != nil && !hcs.IsNotExist(err) { + if err := c.guest.DeleteContainerState(ctx, c.gcsContainerID); err != nil && !isResourceAlreadyReleased(err) { return fmt.Errorf("delete container state: %w", err) } diff --git a/internal/controller/linuxcontainer/container_test.go b/internal/controller/linuxcontainer/container_test.go index 6f9ed6ece7..1eb20d2269 100644 --- a/internal/controller/linuxcontainer/container_test.go +++ b/internal/controller/linuxcontainer/container_test.go @@ -606,38 +606,71 @@ func TestReleaseResources_Idempotent(t *testing.T) { } } -// TestReleaseResources_StopsOnFirstError verifies that releaseResources -// returns the first error encountered and does not proceed to subsequent -// resource categories. +// TestReleaseResources_StopsOnFirstError verifies the chain's response to an +// error at the SCSI-extras unmap leg. A real failure short-circuits the chain +// (plan9/vpci skipped, slice retained for retry). Any "already-gone" error — +// the GCS bridge being closed or the VM no longer being available — is +// tolerated and the chain proceeds through the remaining categories. func TestReleaseResources_StopsOnFirstError(t *testing.T) { t.Parallel() - c, scsiCtrl, _, _, _ := newContainerTestController(t) - scsiGUID, _ := guid.NewV4() - plan9GUID, _ := guid.NewV4() - deviceGUID, _ := guid.NewV4() + cases := []struct { + name string + scsiErr error + wantStops bool + }{ + {name: "RealError_StopsChain", scsiErr: errUnmapSCSI, wantStops: true}, + {name: "BridgeClosed_Continues", scsiErr: fmt.Errorf("transport gone: %w", gcs.ErrBridgeClosed)}, + {name: "ComputeSystemDoesNotExist_Continues", scsiErr: fmt.Errorf("hcs::System::Modify: %w", hcs.ErrComputeSystemDoesNotExist)}, + {name: "VmcomputeAlreadyStopped_Continues", scsiErr: fmt.Errorf("hcs::System::Modify: %w", hcs.ErrVmcomputeAlreadyStopped)}, + {name: "VmcomputeOperationInvalidState_Continues", scsiErr: fmt.Errorf("hcs::System::Modify: %w", hcs.ErrVmcomputeOperationInvalidState)}, + {name: "AlreadyClosed_Continues", scsiErr: fmt.Errorf("hcs::System::Modify: %w", hcs.ErrAlreadyClosed)}, + } - c.scsiResources = []guid.GUID{scsiGUID} - c.plan9Resources = []guid.GUID{plan9GUID} - c.devices = []guid.GUID{deviceGUID} + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + c, scsiCtrl, plan9Ctrl, vpciCtrl, guestCtrl := newContainerTestController(t) - // SCSI unmap fails; plan9/vpci should not be invoked. - scsiCtrl.EXPECT().UnmapFromGuest(gomock.Any(), scsiGUID).Return(errUnmapSCSI) + scsiGUID, _ := guid.NewV4() + plan9GUID, _ := guid.NewV4() + deviceGUID, _ := guid.NewV4() - err := c.releaseResources(t.Context()) - if !errors.Is(err, errUnmapSCSI) { - t.Fatalf("releaseResources error = %v, want %v", err, errUnmapSCSI) - } + c.scsiResources = []guid.GUID{scsiGUID} + c.plan9Resources = []guid.GUID{plan9GUID} + c.devices = []guid.GUID{deviceGUID} - // Failed scsi entry is retained for retry; plan9 and devices unchanged. - if len(c.scsiResources) != 1 { - t.Errorf("scsiResources len = %d, want 1 (retained for retry)", len(c.scsiResources)) - } - if len(c.plan9Resources) != 1 { - t.Errorf("plan9Resources len = %d, want 1 (untouched)", len(c.plan9Resources)) - } - if len(c.devices) != 1 { - t.Errorf("devices len = %d, want 1 (untouched)", len(c.devices)) + scsiCtrl.EXPECT().UnmapFromGuest(gomock.Any(), scsiGUID).Return(tc.scsiErr) + + if !tc.wantStops { + // Tolerated error: the chain must proceed through plan9, vpci, + // and the DeleteContainerState capability gate. + plan9Ctrl.EXPECT().UnmapFromGuest(gomock.Any(), plan9GUID).Return(nil) + vpciCtrl.EXPECT().RemoveFromVM(gomock.Any(), deviceGUID).Return(nil) + guestCtrl.EXPECT().Capabilities().Return(&gcs.LCOWGuestDefinedCapabilities{}) + } + + err := c.releaseResources(t.Context()) + if tc.wantStops { + if !errors.Is(err, tc.scsiErr) { + t.Fatalf("releaseResources error = %v, want %v", err, tc.scsiErr) + } + // Failed scsi entry retained; plan9 and devices untouched. + if len(c.scsiResources) != 1 { + t.Errorf("scsiResources len = %d, want 1 (retained for retry)", len(c.scsiResources)) + } + if len(c.plan9Resources) != 1 { + t.Errorf("plan9Resources len = %d, want 1 (untouched)", len(c.plan9Resources)) + } + if len(c.devices) != 1 { + t.Errorf("devices len = %d, want 1 (untouched)", len(c.devices)) + } + return + } + if err != nil { + t.Fatalf("releaseResources error = %v, want nil for tolerated error", err) + } + }) } } @@ -860,32 +893,47 @@ func TestReleaseResources_DeleteContainerStateFails(t *testing.T) { } } -// TestReleaseResources_DeleteContainerStateNotFoundIsSuccess verifies that a -// "compute system does not exist" error from DeleteContainerState is treated -// as success. This matters because the GCS bridge removes the container from -// its host-state map even when its inner Delete() fails, so any retry of the -// RPC will always come back with "not found" — looping forever would otherwise -// be the consequence. -func TestReleaseResources_DeleteContainerStateNotFoundIsSuccess(t *testing.T) { +// TestReleaseResources_DeleteContainerState_ToleratesAlreadyGone verifies +// that any error indicating the resource is already gone is treated as a +// successful delete: the GCS bridge being closed (so the in-guest agent +// cannot answer) and any of the HCS conditions that surface when the VM has +// already exited or is in an invalid state for further modifications. In all +// cases the chain must complete and isContainerStateDeleted must flip so a +// retry does not re-issue the RPC. +func TestReleaseResources_DeleteContainerState_ToleratesAlreadyGone(t *testing.T) { t.Parallel() - c, _, _, _, guestCtrl := newContainerTestController(t) - caps := &gcs.LCOWGuestDefinedCapabilities{} - caps.DeleteContainerStateSupported = true + cases := []struct { + name string + err error + }{ + {name: "BridgeClosed", err: fmt.Errorf("transport gone: %w", gcs.ErrBridgeClosed)}, + {name: "ComputeSystemDoesNotExist", err: fmt.Errorf("guest RPC failure: %w", hcs.ErrComputeSystemDoesNotExist)}, + {name: "VmcomputeAlreadyStopped", err: fmt.Errorf("guest RPC failure: %w", hcs.ErrVmcomputeAlreadyStopped)}, + {name: "VmcomputeOperationInvalidState", err: fmt.Errorf("guest RPC failure: %w", hcs.ErrVmcomputeOperationInvalidState)}, + {name: "AlreadyClosed", err: fmt.Errorf("guest RPC failure: %w", hcs.ErrAlreadyClosed)}, + } - guestCtrl.EXPECT().Capabilities().Return(caps) - // The bridge's response Result code is wrapped through windows.Errno - // (= syscall.Errno) so errors.Is against hcs.ErrComputeSystemDoesNotExist - // matches. - guestCtrl.EXPECT(). - DeleteContainerState(gomock.Any(), c.gcsContainerID). - Return(fmt.Errorf("guest RPC failure: %w", hcs.ErrComputeSystemDoesNotExist)) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + c, _, _, _, guestCtrl := newContainerTestController(t) - if err := c.releaseResources(t.Context()); err != nil { - t.Fatalf("releaseResources returned error for not-found: %v", err) - } - if !c.isContainerStateDeleted { - t.Fatal("isContainerStateDeleted should be true even when the guest reports the container is gone") + caps := &gcs.LCOWGuestDefinedCapabilities{} + caps.DeleteContainerStateSupported = true + + guestCtrl.EXPECT().Capabilities().Return(caps) + guestCtrl.EXPECT(). + DeleteContainerState(gomock.Any(), c.gcsContainerID). + Return(tc.err) + + if err := c.releaseResources(t.Context()); err != nil { + t.Fatalf("releaseResources returned error: %v", err) + } + if !c.isContainerStateDeleted { + t.Fatal("isContainerStateDeleted should be true when the guest reports the container is gone") + } + }) } } diff --git a/internal/controller/network/network_lcow.go b/internal/controller/network/network_lcow.go index 1ec3fde0eb..4452f7539a 100644 --- a/internal/controller/network/network_lcow.go +++ b/internal/controller/network/network_lcow.go @@ -4,12 +4,15 @@ package network import ( "context" + "errors" "fmt" "github.com/Microsoft/hcsshim/hcn" + "github.com/Microsoft/hcsshim/internal/gcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" ) // guestNetwork exposes linux guest network operations. @@ -81,7 +84,7 @@ func (c *Controller) removeEndpointFromGuestNamespace(ctx context.Context, nicID if err := c.guestNetwork.RemoveNetworkInterface(ctx, &guestresource.LCOWNetworkAdapter{ NamespaceID: c.namespaceID, ID: nicID, - }); err != nil { + }); err != nil && !errors.Is(err, gcs.ErrBridgeClosed) { return fmt.Errorf("remove NIC %s from guest: %w", nicID, err) } @@ -92,7 +95,7 @@ func (c *Controller) removeEndpointFromGuestNamespace(ctx context.Context, nicID if err := c.vmNetwork.RemoveNIC(ctx, nicID, &hcsschema.NetworkAdapter{ EndpointId: endpoint.Id, MacAddress: endpoint.MacAddress, - }); err != nil { + }); err != nil && !vmutils.IsVMNotAvailableError(err) { return fmt.Errorf("remove NIC %s from host (endpoint %s): %w", nicID, endpoint.Id, err) } diff --git a/internal/controller/network/network_lcow_test.go b/internal/controller/network/network_lcow_test.go index 819e94448b..0ae57eaf42 100644 --- a/internal/controller/network/network_lcow_test.go +++ b/internal/controller/network/network_lcow_test.go @@ -5,6 +5,7 @@ package network import ( "context" "errors" + "fmt" "testing" "go.uber.org/mock/gomock" @@ -13,6 +14,7 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/network/mocks" "github.com/Microsoft/hcsshim/internal/gcs" "github.com/Microsoft/hcsshim/internal/guest/prot" + "github.com/Microsoft/hcsshim/internal/hcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) @@ -191,6 +193,30 @@ func TestLCOW_RemoveEndpoint_GuestFails_HostNotCalled(t *testing.T) { } } +// TestLCOW_RemoveEndpoint_BridgeClosed_HostStillCalled verifies that when the +// guest-side removal fails because the bridge is closed (the GCS is gone and +// its state with it), the controller still hot-removes the NIC from the host +// so cleanup completes instead of stalling on a doomed retry. +func TestLCOW_RemoveEndpoint_BridgeClosed_HostStillCalled(t *testing.T) { + ctrl := gomock.NewController(t) + c, vm, guest := newLCOWController(t, ctrl, true) + + ep := newLCOWEndpoint("eth0") + + gomock.InOrder( + guest.EXPECT().RemoveNetworkInterface(gomock.Any(), gomock.Any()). + Return(fmt.Errorf("transport gone: %w", gcs.ErrBridgeClosed)), + vm.EXPECT().RemoveNIC(gomock.Any(), "nic-1", &hcsschema.NetworkAdapter{ + EndpointId: ep.Id, + MacAddress: ep.MacAddress, + }).Return(nil), + ) + + if err := c.removeEndpointFromGuestNamespace(context.Background(), "nic-1", ep); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + // TestLCOW_RemoveEndpoint_NoNamespaceSupport_HostOnly verifies that when the // guest never received the namespace, the controller skips the guest-side // removal and only hot-removes the NIC from the host. @@ -211,6 +237,45 @@ func TestLCOW_RemoveEndpoint_NoNamespaceSupport_HostOnly(t *testing.T) { } } +// TestLCOW_RemoveEndpoint_HostFails_VMGone_Tolerated verifies that when the +// host-side RemoveNIC fails because the UVM has already exited (HCS reports +// the system as gone / already stopped / invalid state / handle closed), the +// controller treats the failure as success. The NIC is destroyed alongside +// the VM, so propagating the error would only leak the cached endpoint +// mapping and block teardown — symmetric with the bridge-closed tolerance on +// the guest side. +func TestLCOW_RemoveEndpoint_HostFails_VMGone_Tolerated(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + err error + }{ + {"ComputeSystemDoesNotExist", fmt.Errorf("hcs::System::Modify: %w", hcs.ErrComputeSystemDoesNotExist)}, + {"VmcomputeAlreadyStopped", fmt.Errorf("hcs::System::Modify: %w", hcs.ErrVmcomputeAlreadyStopped)}, + {"VmcomputeOperationInvalidState", fmt.Errorf("hcs::System::Modify: %w", hcs.ErrVmcomputeOperationInvalidState)}, + {"AlreadyClosed", fmt.Errorf("hcs::System::Modify: %w", hcs.ErrAlreadyClosed)}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + c, vm, guest := newLCOWController(t, ctrl, true) + + ep := newLCOWEndpoint("eth0") + + gomock.InOrder( + guest.EXPECT().RemoveNetworkInterface(gomock.Any(), gomock.Any()).Return(nil), + vm.EXPECT().RemoveNIC(gomock.Any(), "nic-1", gomock.Any()).Return(tc.err), + ) + + if err := c.removeEndpointFromGuestNamespace(context.Background(), "nic-1", ep); err != nil { + t.Fatalf("expected VM-gone error from host RemoveNIC to be tolerated, got: %v", err) + } + }) + } +} + // ───────────────────────────────────────────────────────────────────────────── // Teardown tests (LCOW: removeNetNSInsideGuest is a no-op so full Teardown // is exercisable end-to-end without HNS) diff --git a/internal/controller/network/network_wcow.go b/internal/controller/network/network_wcow.go index c9911351d2..55a9a8ef69 100644 --- a/internal/controller/network/network_wcow.go +++ b/internal/controller/network/network_wcow.go @@ -4,13 +4,16 @@ package network import ( "context" + "errors" "fmt" "github.com/Microsoft/hcsshim/hcn" + "github.com/Microsoft/hcsshim/internal/gcs" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" "github.com/sirupsen/logrus" ) @@ -55,7 +58,10 @@ func (c *Controller) removeNetNSInsideGuest(ctx context.Context, namespaceID str return fmt.Errorf("get network namespace %s: %w", namespaceID, err) } - if err := c.guestNetwork.RemoveNetworkNamespace(ctx, hcnNamespace); err != nil { + // If the GCS bridge is already closed (e.g. the guest agent crashed), the + // namespace will be torn down with the VM, so treat that as success and let + // teardown continue. + if err = c.guestNetwork.RemoveNetworkNamespace(ctx, hcnNamespace); err != nil && !errors.Is(err, gcs.ErrBridgeClosed) { return fmt.Errorf("remove network namespace %s from guest: %w", namespaceID, err) } } @@ -119,7 +125,7 @@ func (c *Controller) removeEndpointFromGuestNamespace(ctx context.Context, nicID nicID, guestrequest.RequestTypeRemove, nil, - ); err != nil { + ); err != nil && !errors.Is(err, gcs.ErrBridgeClosed) { return fmt.Errorf("remove NIC %s from guest (endpoint %s): %w", nicID, endpoint.Id, err) } @@ -129,7 +135,7 @@ func (c *Controller) removeEndpointFromGuestNamespace(ctx context.Context, nicID if err := c.vmNetwork.RemoveNIC(ctx, nicID, &hcsschema.NetworkAdapter{ EndpointId: endpoint.Id, MacAddress: endpoint.MacAddress, - }); err != nil { + }); err != nil && !vmutils.IsVMNotAvailableError(err) { return fmt.Errorf("remove NIC %s from host (endpoint %s): %w", nicID, endpoint.Id, err) } diff --git a/internal/controller/network/network_wcow_test.go b/internal/controller/network/network_wcow_test.go index a68e0a8dbe..5eb77e31e7 100644 --- a/internal/controller/network/network_wcow_test.go +++ b/internal/controller/network/network_wcow_test.go @@ -5,6 +5,7 @@ package network import ( "context" "errors" + "fmt" "testing" "go.uber.org/mock/gomock" @@ -12,6 +13,7 @@ import ( "github.com/Microsoft/hcsshim/hcn" "github.com/Microsoft/hcsshim/internal/controller/network/mocks" "github.com/Microsoft/hcsshim/internal/gcs" + "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" @@ -196,6 +198,72 @@ func TestWCOW_RemoveEndpoint_GuestFails_HostNotCalled(t *testing.T) { } } +// TestWCOW_RemoveEndpoint_BridgeClosed_HostStillCalled verifies that when the +// guest-side removal fails because the GCS bridge is closed (the guest agent +// is gone and its state with it), the controller still hot-removes the NIC +// from the host so cleanup completes instead of stalling on a doomed retry. +// Symmetric with the VM-gone tolerance on the host side. +func TestWCOW_RemoveEndpoint_BridgeClosed_HostStillCalled(t *testing.T) { + ctrl := gomock.NewController(t) + c, vm, guest := newWCOWController(t, ctrl, true) + + ep := newWCOWEndpoint("eth0") + + gomock.InOrder( + guest.EXPECT().RemoveNetworkInterface( + gomock.Any(), "nic-1", guestrequest.RequestTypeRemove, gomock.Nil(), + ).Return(fmt.Errorf("transport gone: %w", gcs.ErrBridgeClosed)), + vm.EXPECT().RemoveNIC(gomock.Any(), "nic-1", &hcsschema.NetworkAdapter{ + EndpointId: ep.Id, + MacAddress: ep.MacAddress, + }).Return(nil), + ) + + if err := c.removeEndpointFromGuestNamespace(context.Background(), "nic-1", ep); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// TestWCOW_RemoveEndpoint_HostFails_VMGone_Tolerated verifies that when the +// host-side RemoveNIC fails because the UVM has already exited (HCS reports +// the system as gone / already stopped / invalid state / handle closed), the +// controller treats the failure as success. The NIC is destroyed alongside +// the VM, so propagating the error would only leak the cached endpoint +// mapping and block teardown. +func TestWCOW_RemoveEndpoint_HostFails_VMGone_Tolerated(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + err error + }{ + {"ComputeSystemDoesNotExist", fmt.Errorf("hcs::System::Modify: %w", hcs.ErrComputeSystemDoesNotExist)}, + {"VmcomputeAlreadyStopped", fmt.Errorf("hcs::System::Modify: %w", hcs.ErrVmcomputeAlreadyStopped)}, + {"VmcomputeOperationInvalidState", fmt.Errorf("hcs::System::Modify: %w", hcs.ErrVmcomputeOperationInvalidState)}, + {"AlreadyClosed", fmt.Errorf("hcs::System::Modify: %w", hcs.ErrAlreadyClosed)}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + c, vm, guest := newWCOWController(t, ctrl, true) + + ep := newWCOWEndpoint("eth0") + + gomock.InOrder( + guest.EXPECT().RemoveNetworkInterface( + gomock.Any(), "nic-1", guestrequest.RequestTypeRemove, gomock.Nil(), + ).Return(nil), + vm.EXPECT().RemoveNIC(gomock.Any(), "nic-1", gomock.Any()).Return(tc.err), + ) + + if err := c.removeEndpointFromGuestNamespace(context.Background(), "nic-1", ep); err != nil { + t.Fatalf("expected VM-gone error from host RemoveNIC to be tolerated, got: %v", err) + } + }) + } +} + // ───────────────────────────────────────────────────────────────────────────── // Teardown tests // diff --git a/internal/controller/process/process.go b/internal/controller/process/process.go index b35038762f..43ff044d33 100644 --- a/internal/controller/process/process.go +++ b/internal/controller/process/process.go @@ -15,7 +15,7 @@ import ( "github.com/Microsoft/hcsshim/internal/logfields" eventstypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/api/runtime/task/v2" + "github.com/containerd/containerd/api/runtime/task/v3" "github.com/containerd/errdefs" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" diff --git a/internal/controller/vm/vm.go b/internal/controller/vm/vm.go index d623ef2d7f..68ac56dcb4 100644 --- a/internal/controller/vm/vm.go +++ b/internal/controller/vm/vm.go @@ -342,6 +342,7 @@ func (c *Controller) waitForVMExit(ctx context.Context) { // copy the original to prevent it affecting the background wait go routine ctx = context.WithoutCancel(ctx) _ = c.uvm.Wait(ctx) + // Once the VM has exited, attempt to transition to Terminated. // This may be a no-op if TerminateVM already ran concurrently and // transitioned the state first — log the discarded error so that @@ -350,6 +351,15 @@ func (c *Controller) waitForVMExit(ctx context.Context) { if c.vmState != StateTerminated { c.vmState = StateTerminated } + + // Force the bridge to collapse so in-flight container/process waits are + // released. CloseConnection is idempotent. + if c.guest != nil { + if err := c.guest.CloseConnection(); err != nil { + log.G(ctx).WithError(err).Warn("close guest connection after vm exit failed") + } + } + c.mu.Unlock() } diff --git a/internal/gcs/bridge.go b/internal/gcs/bridge.go index b03b555662..5b611f262e 100644 --- a/internal/gcs/bridge.go +++ b/internal/gcs/bridge.go @@ -60,7 +60,7 @@ type bridge struct { waitCh chan struct{} } -var errBridgeClosed = fmt.Errorf("bridge closed: %w", net.ErrClosed) +var ErrBridgeClosed = fmt.Errorf("bridge closed: %w", net.ErrClosed) const ( // bridgeFailureTimeout is the default value for bridge.Timeout @@ -103,9 +103,11 @@ func (brdg *bridge) kill(err error) { } brdg.closed = true brdg.mu.Unlock() - brdg.brdgErr = err if err != nil { - brdg.log.WithError(err).Error("bridge forcibly terminating") + // Wrap the cause so callers can recognize this as a closed-bridge + // failure and stop retrying. + brdg.brdgErr = fmt.Errorf("%w: %w", ErrBridgeClosed, err) + brdg.log.WithError(brdg.brdgErr).Error("bridge forcibly terminating") } else { brdg.log.Debug("bridge terminating") } @@ -147,7 +149,7 @@ func (brdg *bridge) AsyncRPC(ctx context.Context, proc prot.RPCProc, req request case <-brdg.waitCh: err := brdg.brdgErr if err == nil { - err = errBridgeClosed + err = ErrBridgeClosed } return nil, err case <-ctx.Done(): @@ -244,7 +246,7 @@ func (brdg *bridge) recvLoopRoutine() { brdg.rpcs = nil brdg.mu.Unlock() for _, call := range rpcs { - call.complete(errBridgeClosed) + call.complete(ErrBridgeClosed) } } @@ -422,7 +424,7 @@ func (brdg *bridge) sendRPC(buf *bytes.Buffer, enc *json.Encoder, call *rpc) err brdg.mu.Lock() if brdg.rpcs == nil { brdg.mu.Unlock() - call.complete(errBridgeClosed) + call.complete(ErrBridgeClosed) return nil } id := brdg.nextID diff --git a/internal/gcs/bridge_test.go b/internal/gcs/bridge_test.go index c1b3c1ab55..61faea9256 100644 --- a/internal/gcs/bridge_test.go +++ b/internal/gcs/bridge_test.go @@ -147,8 +147,34 @@ func TestBridgeRPCBridgeClosed(t *testing.T) { eerr := errors.New("forcibly terminated") b.kill(eerr) err := b.RPC(context.Background(), prot.RPCCreate, nil, nil, false) - if err != eerr { //nolint:errorlint - t.Fatal("unexpected: ", err) + // Must wrap both the transport error and ErrBridgeClosed. + if !errors.Is(err, eerr) { + t.Fatalf("expected err to wrap eerr; got: %v", err) + } + if !errors.Is(err, ErrBridgeClosed) { + t.Fatalf("expected err to wrap ErrBridgeClosed; got: %v", err) + } +} + +// TestBridgeKillWrapsSentinel verifies that once the bridge has been killed, +// any subsequent or in-flight operation surfaces an error recognizable as a +// closed bridge, so cleanup paths can safely give up after the guest dies +// instead of retrying forever. +func TestBridgeKillWrapsSentinel(t *testing.T) { + b := startReflectedBridge(t, 0) + transportErr := errors.New("bridge read failed: header read: EOF") + b.kill(transportErr) + + if err := b.Wait(); !errors.Is(err, ErrBridgeClosed) || !errors.Is(err, transportErr) { + t.Fatalf("Wait(): expected err to wrap both ErrBridgeClosed and transportErr; got: %v", err) + } + + err := b.RPC(context.Background(), prot.RPCCreate, &testReq{}, &testResp{}, false) + if !errors.Is(err, ErrBridgeClosed) { + t.Fatalf("post-kill RPC: expected errors.Is(err, ErrBridgeClosed); got: %v", err) + } + if !errors.Is(err, transportErr) { + t.Fatalf("post-kill RPC: expected errors.Is(err, transportErr); got: %v", err) } } diff --git a/internal/vm/vmutils/gcs_logs.go b/internal/vm/vmutils/gcs_logs.go index c680dfcd76..62ce3c8cb3 100644 --- a/internal/vm/vmutils/gcs_logs.go +++ b/internal/vm/vmutils/gcs_logs.go @@ -63,8 +63,12 @@ func ParseGCSLogrus(vmID string) OutputHandler { if err != nil { // Handle decoding errors, EOF, or disconnections // Log the error unless it's an expected EOF or network disconnect - // (WSAECONNABORTED or WSAECONNRESET indicate expected shutdown/disconnect) - if !errors.Is(err, io.EOF) && !hcs.IsAny(err, windows.WSAECONNABORTED, windows.WSAECONNRESET) { + // (WSAECONNABORTED or WSAECONNRESET indicate expected shutdown/disconnect). + // A *json.SyntaxError means GCS wrote a non-JSON line (e.g. a Go + // runtime panic dump). The trailing-bytes read below logs the + // raw payload as "gcs terminated", so skip the noisy parse error. + var syntaxErr *json.SyntaxError + if !errors.Is(err, io.EOF) && !hcs.IsAny(err, windows.WSAECONNABORTED, windows.WSAECONNRESET) && !errors.As(err, &syntaxErr) { logrus.WithFields(logrus.Fields{ logfields.UVMID: vmID, logrus.ErrorKey: err, diff --git a/internal/vm/vmutils/utils.go b/internal/vm/vmutils/utils.go index f609f975ff..e5b1a51748 100644 --- a/internal/vm/vmutils/utils.go +++ b/internal/vm/vmutils/utils.go @@ -10,6 +10,7 @@ import ( "path/filepath" runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/log" "github.com/containerd/typeurl/v2" @@ -61,3 +62,13 @@ func UnmarshalRuntimeOptions(ctx context.Context, options *anypb.Any) (*runhcsop return shimOpts, nil } + +// IsVMNotAvailableError reports whether err indicates the underlying compute system is +// no longer available for modification — either it has stopped, no longer +// exists, or is in an invalid state for further modifications.. +func IsVMNotAvailableError(err error) bool { + return hcs.IsNotExist(err) || + hcs.IsAlreadyStopped(err) || + hcs.IsAlreadyClosed(err) || + hcs.IsOperationInvalidState(err) +}