forked from kolayne/docker-on-top
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdriver.go
More file actions
439 lines (388 loc) · 18.2 KB
/
driver.go
File metadata and controls
439 lines (388 loc) · 18.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
package main
import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
"regexp"
"strings"
"syscall"
"github.com/docker/go-plugins-helpers/volume"
)
// This regex is based on the error message from docker daemon when requested to create a volume with invalid name
var volNameFormat = regexp.MustCompile("^[a-zA-Z0-9][a-zA-Z0-9_.-]*$")
func (d *DockerOnTop) Create(request *volume.CreateRequest) error {
log.Debugf("Request Create: Name=%s Options=%s", request.Name, request.Options)
if !volNameFormat.MatchString(request.Name) {
log.Debug("Volume name doesn't comply to the regex. Volume not created")
if strings.ContainsRune(request.Name, '/') {
// Handle this case separately for a more specific error message
return errors.New("volume name cannot contain slashes (for specifying host path use " +
"`-o base=/path/to/base/directory`)")
}
return errors.New("volume name contains illegal characters: " +
"it should comply to \"[a-zA-Z0-9][a-zA-Z0-9_.-]*\"")
}
allowedOptions := map[string]bool{"base": true, "volatile": true} // Values are meaningless, only keys matter
for opt := range request.Options {
if _, ok := allowedOptions[opt]; !ok {
log.Debugf("Unknown option %s. Volume not created", opt)
return errors.New("Invalid option " + opt)
}
}
baseDir, ok := request.Options["base"]
if !ok {
log.Debug("No `base` option was provided. Volume not created")
return errors.New("`base` option must be provided and set to an absolute path to the base directory on host")
}
if len(baseDir) < 1 || baseDir[0] != '/' {
log.Debug("`base` is not an absolute path. Volume not created")
return errors.New("`base` must be an absolute path")
} else if strings.ContainsRune(baseDir, ',') || strings.ContainsRune(baseDir, ':') {
log.Debug("`base` contains a comma or a colon. Volume not created")
return errors.New("directories with commas and/or colons in the path are not supported")
} else {
// Check that the base directory exists
f, err := os.Open(baseDir)
if os.IsNotExist(err) {
// The base directory does not exist. Note that it doesn't make sense to implicitly create it (as docker
// does by default with bind mounts), as the point of docker-on-top is to let containers work _on top_ of
// an existing host directory, so implicitly making an empty one would be pointless.
log.Debugf("The base directory %s does not exist. Volume not created", baseDir)
return errors.New("the base directory does not exist")
} else if err != nil {
log.Errorf("Failed to open base directory: %v. Volume not created", err)
return fmt.Errorf("the specified base directory is inaccessible: %w", err)
} else {
_ = f.Close()
}
}
var volatile bool
volatileS, ok := request.Options["volatile"]
if !ok {
volatileS = "false"
}
volatileS = strings.ToLower(volatileS)
if volatileS == "no" || volatileS == "false" {
volatile = false
} else if volatileS == "yes" || volatileS == "true" {
volatile = true
} else {
log.Debug("Option `volatile` has an invalid value. Volume not created")
return errors.New("option `volatile` must be either 'true', 'false', 'yes', or 'no'")
}
if err := d.volumeTreeCreate(request.Name); err != nil {
if os.IsExist(err) {
log.Debug("Volume's main directory already exists. New volume not created")
return errors.New("volume already exists")
} else {
// The error is already logged and wrapped in `internalError` by `d.volumeTreeCreate`
return err
}
}
if err := d.writeVolumeInfo(request.Name, VolumeInfo{BaseDirPath: baseDir, Volatile: volatile}); err != nil {
log.Errorf("Failed to write metadata for volume %s: %v. Aborting volume creation (attempting "+
"to destroy the volume's tree)", request.Name, err)
_ = d.volumeTreeDestroy(request.Name) // The errors are logged, if any
return internalError("failed to store metadata for the volume", err)
}
return nil
}
func (d *DockerOnTop) List() (*volume.ListResponse, error) {
log.Debug("Request List")
var response volume.ListResponse
entries, err := os.ReadDir(d.dotRootDir)
if err != nil {
log.Errorf("Failed to list contents of the dot root directory: %v", err)
return nil, internalError("failed to list contents of the dot root directory", err)
}
for _, volMainDir := range entries {
response.Volumes = append(response.Volumes, &volume.Volume{Name: volMainDir.Name()})
}
return &response, nil
}
func (d *DockerOnTop) Get(request *volume.GetRequest) (*volume.GetResponse, error) {
log.Debugf("Request Get: Name=%s", request.Name)
// Note: the implementation does not ensure that `d.dotRootDir + request.Name` is a directory.
// I don't think it's worth checking, though, as under the normal plugin operation (with no interference from
// third parties) only directories are created in `d.dotRootDir`
dir, err := os.Open(d.dotRootDir + request.Name)
if err == nil {
_ = dir.Close()
log.Debug("Found volume. Listing it (just its name)")
return &volume.GetResponse{Volume: &volume.Volume{Name: request.Name}}, nil
} else if os.IsNotExist(err) {
log.Debug("The requested volume does not exist")
return nil, errors.New("no such volume")
} else {
log.Errorf("Failed to open the volume's main directory: %v", err)
return nil, internalError("failed to open the volume's main directory", err)
}
}
func (d *DockerOnTop) Remove(request *volume.RemoveRequest) error {
log.Debugf("Request Remove: Name=%s. It will succeed regardless of the presence of the volume", request.Name)
// If dockerd sent us this request, it means no containers are using the volume.
// Under normal operation, it means that mountpoint must not exist already.
//
// If it stil exists, though, we will try to recover now. If recovery fails, we report
// failure but the volume remains in a consistent state (nothing is removed).
mountpoint := d.mountpointdir(request.Name)
// Try to remove it in case it's not mounted
err := os.Remove(mountpoint)
if errors.Is(err, syscall.EBUSY) {
// Try to unmount
err = syscall.Unmount(mountpoint, syscall.MNT_FORCE|syscall.MNT_DETACH)
if err == nil {
log.Warningf("Unmounted (force+detach) volume %s, which is being removed", request.Name)
} else if os.IsNotExist(err) {
log.Warningf("The mountpoint %s existed and has literally just disappeared, what is going on?",
mountpoint)
} else {
log.Errorf("Volume %s to be removed is still mounted and cannot be unmounted: %v", err)
return internalError("failed to unmount volume mountpoint when removing", err)
}
} else if os.IsNotExist(err) {
// That's the default case: mountpoint does not exist
} else if err != nil {
log.Errorf("Volume %s to be removed does not seem mounted but the mountpoint exists and cannot be removed: %v",
request.Name, err)
log.Errorf("Volume %s to be removed does not seem mounted but still cannot be removed: %v",
request.Name, err)
return internalError("failed to remove the mountpoint directory of a volume, which seems unmounted",
err)
}
err = os.RemoveAll(d.dotRootDir + request.Name)
if err != nil {
// This potentially leaves volume directory in an inconsistent state :(
log.Errorf("Failed to RemoveAll main directory for volume %s: %v", request.Name, err)
return internalError("failed to RemoveAll volume main directory", err)
}
return nil
}
func (d *DockerOnTop) Path(request *volume.PathRequest) (*volume.PathResponse, error) {
log.Debugf("Request Path: Name=%s", request.Name)
return &volume.PathResponse{Mountpoint: d.mountpointdir(request.Name)}, nil
}
func (d *DockerOnTop) Mount(request *volume.MountRequest) (*volume.MountResponse, error) {
log.Debugf("Request Mount: ID=%s, Name=%s", request.ID, request.Name)
_, err := d.getVolumeInfo(request.Name)
if os.IsNotExist(err) {
log.Debugf("Couldn't get volume info: %v", err)
return nil, errors.New("no such volume")
} else if err != nil {
log.Errorf("Failed to retrieve metadata for volume %s: %v", request.Name, err)
return nil, internalError("failed to retrieve the volume's metadata", err)
}
// Synchronization. Take an exclusive lock on the activemounts/ dir of the volume to ensure that no parallel
// mounts/unmounts interfere. Note that it is crucial that the lock is held not only during the checks on other
// containers using the volume, but until a complete mount/unmount is performed: if, instead, we unlocked after
// finding that we are the first mount request (thus responsible to mount) but before actually mounting, another
// thread will see that the volume is already in use and assume it is mounted (while it isn't yet),
// which is a race condition.
var activemountsdir lockedFile
err = activemountsdir.Open(d.activemountsdir(request.Name))
if err != nil {
// The error is already logged and wrapped in `internalError` in lockedFile.go
return nil, err
}
defer activemountsdir.Close() // There is nothing I could do about the error (logging is performed inside `Close()` anyway)
err = d.activateVolume(request.Name, request.ID, activemountsdir)
if err == nil {
mountpoint := d.mountpointdir(request.Name)
response := volume.MountResponse{Mountpoint: mountpoint}
return &response, nil
} else {
return nil, err
}
}
func (d *DockerOnTop) Unmount(request *volume.UnmountRequest) error {
log.Debugf("Request Unmount: ID=%s, Name=%s", request.ID, request.Name)
// Synchronization. Taking an exclusive lock on activemounts/ of the volume so that parallel mounts/unmounts
// don't interfere.
// For more details, read the comment at the beginning of `DockerOnTop.Mount`.
var activemountsdir lockedFile
err := activemountsdir.Open(d.activemountsdir(request.Name))
if err != nil {
// The error is already logged and wrapped in `internalError` in lockedFile.go
return err
}
defer activemountsdir.Close() // There's nothing I can do about the error if it occurs
err = d.deactivateVolume(request.Name, request.ID, activemountsdir)
return err
}
func (d *DockerOnTop) Capabilities() *volume.CapabilitiesResponse {
log.Debug("Request Capabilities: plugin discovery")
return &volume.CapabilitiesResponse{Capabilities: volume.Capability{Scope: "volume"}}
}
// =======================================================================================
// | CONCEPTUAL NOTE: an existing file at the active mounts directory is a guarantee of |
// | another container using the volume; the absense of files at the active mounts directory |
// | is a guarantee that no container is using the volume. |
// | |
// | The existence of a mountpoint directory or an overlay mounted on it is a side effect, |
// | which does not guarantee anything, and shall not be relied upon. |
// =======================================================================================
// activateVolume activates a volume: checks if other containers are using it already,
// mounts it if needed, and handles all the internal state matters.
//
// The volume must exist, otherwise the function panics.
//
// Parameters:
//
// volumeName: Name of the volume to be mounted
// requestId: Unique ID of the mount request
// activemountsdir: Folder where mounts are tracked (with an exclusive lock taken)
//
// Return:
//
// err: An error, if encountered
func (d *DockerOnTop) activateVolume(volumeName string, requestId string, activemountsdir lockedFile) error {
thisVol, err := d.getVolumeInfo(volumeName)
if err != nil {
panic(err)
}
_, err = activemountsdir.ReadDir(1) // Check if there are any files inside activemounts dir
if errors.Is(err, io.EOF) {
// No files => no other containers are using the volume. Need to mount the overlay
lowerdir := thisVol.BaseDirPath
upperdir := d.upperdir(volumeName)
workdir := d.workdir(volumeName)
mountpoint := d.mountpointdir(volumeName)
err = d.volumeTreePreMount(volumeName, thisVol.Volatile)
if err != nil {
// The error is already logged and wrapped in `internalError` by `d.volumeTreePreMount`
return err
}
options := "lowerdir=" + lowerdir + ",upperdir=" + upperdir + ",workdir=" + workdir
err = syscall.Mount("docker-on-top_"+volumeName, mountpoint, "overlay", 0, options)
if os.IsNotExist(err) {
log.Errorf("Failed to mount overlay for volume %s because something does not exist: %v",
volumeName, err)
return errors.New("failed to mount volume: something is missing (does the base directory exist?)")
} else if err != nil {
log.Errorf("Failed to mount overlay for volume %s: %v", volumeName, err)
return internalError("failed to mount overlay", err)
}
log.Debugf("Mounted volume %s at %s", volumeName, mountpoint)
} else if err == nil {
log.Debugf("Volume %s is already mounted for some other container. Indicating success without remounting",
volumeName)
} else {
log.Errorf("Failed to list the activemounts directory: %v", err)
return internalError("failed to list activemounts/", err)
}
var activeMountInfo activeMount
activemountFilePath := d.activemountsdir(volumeName) + requestId
payload, err := os.ReadFile(activemountFilePath)
if err == nil {
// The file may exists from a previous mount when doing a docker cp on an already running
// container. Thus, no need to mount, just increment the counter.
err = json.Unmarshal(payload, &activeMountInfo)
if err != nil {
log.Errorf("Failed to decode active mount file %s : %v", activemountFilePath, err)
return internalError("failed to decode active mount file", err)
}
} else if os.IsNotExist(err) {
// Default case, we need to create a new active mount
activeMountInfo = activeMount{UsageCount: 0}
} else {
log.Errorf("Failed to read active mount file %s : %v", activemountFilePath, err)
return internalError("failed to read active mount file", err)
}
activeMountInfo.UsageCount++
// Here, intentionally separating file creation and writing (instead of using `os.WriteFile`)
// to perform more careful error handling
activemountFile, err := os.Create(activemountFilePath)
if err != nil {
// We have successfully mounted the overlay but failed to mark that we are using it.
// If we use the volume now, we break the guarantee that we shall provide according
// to the above note. Thus, refusing with an error.
// We leave the overlay mounted as a harmless side effect.
log.Errorf("While mounting volume %s, failed to create active mount file: %v", volumeName, err)
return internalError("failed to create active mount file while mounting volume", err)
}
defer activemountFile.Close()
_, err = activemountFile.Write(activeMountInfo.mustMarshal())
if err != nil {
// This is an unfortunate situation. We have either created or truncated the active
// mounts file, rendering it unreadable on future requests. There is not much we can do.
log.Errorf("Failed to write to the active mount file %s : %v", activemountFilePath, err)
return internalError("failed to write to the active mount file", err)
}
return nil
}
// deactivateVolume deactivates a volume: checks if other containers are still using it,
// unmounts it if needed and handles all the internal state matters.
//
// The volume must exist, otherwise the function panics.
//
// Parameters:
//
// volumeName: Name of the volume to be mounted
// requestId: Unique ID of the mount request
// activemountsdir: Folder where mounts are tracked (with an exclusive lock taken)
func (d *DockerOnTop) deactivateVolume(volumeName string, requestId string, activemountsdir lockedFile) error {
// In accordance with the conceptual note above, we must first remove the file from the active mounts dir,
// and then attempt to unmount overlay. This ensures that if we crash mid-way, the volume state is consistent:
// a mounted overlay is a harmless side effect, but an active mount file may only exist if the volume is in use.
activemountFilePath := d.activemountsdir(volumeName) + requestId
var activeMountInfo activeMount
payload, err := os.ReadFile(activemountFilePath)
if os.IsNotExist(err) {
log.Warningf("Failed to read&remove %s because it does not exist (but it should...)", activemountFilePath)
// Assuming we are the only user with this mount ID
activeMountInfo = activeMount{UsageCount: 1}
} else if err != nil {
log.Errorf("Failed to read active mounts file %s . The volume %s is now stuck in the active state",
activemountFilePath, volumeName)
// The user most likely won't see this error message because daemon does not show unmount errors to the
// `docker run` clients :((
return internalError("failed to read the active mouint file; the volume is now stuck in the active state", err)
} else {
err = json.Unmarshal(payload, &activeMountInfo)
if err != nil {
log.Errorf("Failed to decode active mount file %s : %v", activemountFilePath, err)
return internalError("failed to decode active mount file", err)
}
}
activeMountInfo.UsageCount--
if activeMountInfo.UsageCount == 0 {
err = os.Remove(activemountFilePath)
if os.IsNotExist(err) {
// It's ok, already logged above
} else if err != nil {
log.Errorf("Failed to remove active mounts file %s : %v; the volume is now stuck in the active state",
activemountFilePath, err)
// The user most likely won't see this error message because the daemon does not show unmount errors
// to the `docker run` clients :((
return internalError("failed to remove the active mount file; the volume is now stuck in the active state", err)
}
} else {
err = os.WriteFile(activemountFilePath, activeMountInfo.mustMarshal(), 0o644)
if err != nil {
log.Errorf("Failed to write to active mount file %s : %v; the file may have been corrupted")
return internalError("failed to write to active mount file (potentially corrupting)", err)
}
log.Debugf("Volume %s is still used by the same container. Indicating success without unmounting",
volumeName)
return nil
}
_, err = activemountsdir.ReadDir(1) // Check if there is any container using the volume (after us)
if errors.Is(err, io.EOF) {
err = syscall.Unmount(d.mountpointdir(volumeName), 0)
if err != nil {
log.Errorf("Failed to unmount %s: %v", d.mountpointdir(volumeName), err)
return err
}
err = d.volumeTreePostUnmount(volumeName)
return err
} else if err == nil {
log.Debugf("Volume %s is still mounted in another container. Indicating success without unmounting",
volumeName)
return nil
} else {
log.Errorf("Failed to list the activemounts directory: %v", err)
return internalError("failed to list activemounts/ ", err)
}
}