Skip to content

Commit a9520e6

Browse files
authored
zpool: Add slot power control, print power status
Add `zpool` flags to control the slot power to drives. This assumes your SAS or NVMe enclosure supports slot power control via sysfs. The new `--power` flag is added to `zpool offline|online|clear`: zpool offline --power <pool> <device> Turn off device slot power zpool online --power <pool> <device> Turn on device slot power zpool clear --power <pool> [device] Turn on device slot power If the ZPOOL_AUTO_POWER_ON_SLOT env var is set, then the '--power' option is automatically implied for `zpool online` and `zpool clear` and does not need to be passed. zpool status also gets a --power option to print the slot power status. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Mart Frauenlob <[email protected]> Signed-off-by: Tony Hutter <[email protected]> Closes openzfs#15662
1 parent 6930ecb commit a9520e6

File tree

16 files changed

+875
-65
lines changed

16 files changed

+875
-65
lines changed

cmd/zpool/os/freebsd/zpool_vdev_os.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,17 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
124124
{
125125
return (check_file_generic(file, force, isspare));
126126
}
127+
128+
int
129+
zpool_power_current_state(zpool_handle_t *zhp, char *vdev)
130+
{
131+
/* Enclosure slot power not supported on FreeBSD yet */
132+
return (-1);
133+
}
134+
135+
int
136+
zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on)
137+
{
138+
/* Enclosure slot power not supported on FreeBSD yet */
139+
return (ENOTSUP);
140+
}

cmd/zpool/os/linux/zpool_vdev_os.c

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,3 +416,258 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
416416
{
417417
return (check_file_generic(file, force, isspare));
418418
}
419+
420+
/*
421+
* Read from a sysfs file and return an allocated string. Removes
422+
* the newline from the end of the string if there is one.
423+
*
424+
* Returns a string on success (which must be freed), or NULL on error.
425+
*/
426+
static char *zpool_sysfs_gets(char *path)
427+
{
428+
int fd;
429+
struct stat statbuf;
430+
char *buf = NULL;
431+
ssize_t count = 0;
432+
fd = open(path, O_RDONLY);
433+
if (fd < 0)
434+
return (NULL);
435+
436+
if (fstat(fd, &statbuf) != 0) {
437+
close(fd);
438+
return (NULL);
439+
}
440+
441+
buf = calloc(sizeof (*buf), statbuf.st_size + 1);
442+
if (buf == NULL) {
443+
close(fd);
444+
return (NULL);
445+
}
446+
447+
/*
448+
* Note, we can read less bytes than st_size, and that's ok. Sysfs
449+
* files will report their size is 4k even if they only return a small
450+
* string.
451+
*/
452+
count = read(fd, buf, statbuf.st_size);
453+
if (count < 0) {
454+
/* Error doing read() or we overran the buffer */
455+
close(fd);
456+
free(buf);
457+
return (NULL);
458+
}
459+
460+
/* Remove trailing newline */
461+
if (buf[count - 1] == '\n')
462+
buf[count - 1] = 0;
463+
464+
close(fd);
465+
466+
return (buf);
467+
}
468+
469+
/*
470+
* Write a string to a sysfs file.
471+
*
472+
* Returns 0 on success, non-zero otherwise.
473+
*/
474+
static int zpool_sysfs_puts(char *path, char *str)
475+
{
476+
FILE *file;
477+
478+
file = fopen(path, "w");
479+
if (!file) {
480+
return (-1);
481+
}
482+
483+
if (fputs(str, file) < 0) {
484+
fclose(file);
485+
return (-2);
486+
}
487+
fclose(file);
488+
return (0);
489+
}
490+
491+
/* Given a vdev nvlist_t, rescan its enclosure sysfs path */
492+
static void
493+
rescan_vdev_config_dev_sysfs_path(nvlist_t *vdev_nv)
494+
{
495+
update_vdev_config_dev_sysfs_path(vdev_nv,
496+
fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH),
497+
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
498+
}
499+
500+
/*
501+
* Given a power string: "on", "off", "1", or "0", return 0 if it's an
502+
* off value, 1 if it's an on value, and -1 if the value is unrecognized.
503+
*/
504+
static int zpool_power_parse_value(char *str)
505+
{
506+
if ((strcmp(str, "off") == 0) || (strcmp(str, "0") == 0))
507+
return (0);
508+
509+
if ((strcmp(str, "on") == 0) || (strcmp(str, "1") == 0))
510+
return (1);
511+
512+
return (-1);
513+
}
514+
515+
/*
516+
* Given a vdev string return an allocated string containing the sysfs path to
517+
* its power control file. Also do a check if the power control file really
518+
* exists and has correct permissions.
519+
*
520+
* Example returned strings:
521+
*
522+
* /sys/class/enclosure/0:0:122:0/10/power_status
523+
* /sys/bus/pci/slots/10/power
524+
*
525+
* Returns allocated string on success (which must be freed), NULL on failure.
526+
*/
527+
static char *
528+
zpool_power_sysfs_path(zpool_handle_t *zhp, char *vdev)
529+
{
530+
const char *enc_sysfs_dir = NULL;
531+
char *path = NULL;
532+
nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL);
533+
534+
if (vdev_nv == NULL) {
535+
return (NULL);
536+
}
537+
538+
/* Make sure we're getting the updated enclosure sysfs path */
539+
rescan_vdev_config_dev_sysfs_path(vdev_nv);
540+
541+
if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
542+
&enc_sysfs_dir) != 0) {
543+
return (NULL);
544+
}
545+
546+
if (asprintf(&path, "%s/power_status", enc_sysfs_dir) == -1)
547+
return (NULL);
548+
549+
if (access(path, W_OK) != 0) {
550+
free(path);
551+
path = NULL;
552+
/* No HDD 'power_control' file, maybe it's NVMe? */
553+
if (asprintf(&path, "%s/power", enc_sysfs_dir) == -1) {
554+
return (NULL);
555+
}
556+
557+
if (access(path, R_OK | W_OK) != 0) {
558+
/* Not NVMe either */
559+
free(path);
560+
return (NULL);
561+
}
562+
}
563+
564+
return (path);
565+
}
566+
567+
/*
568+
* Given a path to a sysfs power control file, return B_TRUE if you should use
569+
* "on/off" words to control it, or B_FALSE otherwise ("0/1" to control).
570+
*/
571+
static boolean_t
572+
zpool_power_use_word(char *sysfs_path)
573+
{
574+
if (strcmp(&sysfs_path[strlen(sysfs_path) - strlen("power_status")],
575+
"power_status") == 0) {
576+
return (B_TRUE);
577+
}
578+
return (B_FALSE);
579+
}
580+
581+
/*
582+
* Check the sysfs power control value for a vdev.
583+
*
584+
* Returns:
585+
* 0 - Power is off
586+
* 1 - Power is on
587+
* -1 - Error or unsupported
588+
*/
589+
int
590+
zpool_power_current_state(zpool_handle_t *zhp, char *vdev)
591+
{
592+
char *val;
593+
int rc;
594+
595+
char *path = zpool_power_sysfs_path(zhp, vdev);
596+
if (path == NULL)
597+
return (-1);
598+
599+
val = zpool_sysfs_gets(path);
600+
if (val == NULL) {
601+
free(path);
602+
return (-1);
603+
}
604+
605+
rc = zpool_power_parse_value(val);
606+
free(val);
607+
free(path);
608+
return (rc);
609+
}
610+
611+
/*
612+
* Turn on or off the slot to a device
613+
*
614+
* Device path is the full path to the device (like /dev/sda or /dev/sda1).
615+
*
616+
* Return code:
617+
* 0: Success
618+
* ENOTSUP: Power control not supported for OS
619+
* EBADSLT: Couldn't read current power state
620+
* ENOENT: No sysfs path to power control
621+
* EIO: Couldn't write sysfs power value
622+
* EBADE: Sysfs power value didn't change
623+
*/
624+
int
625+
zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on)
626+
{
627+
char *sysfs_path;
628+
const char *val;
629+
int rc;
630+
int timeout_ms;
631+
632+
rc = zpool_power_current_state(zhp, vdev);
633+
if (rc == -1) {
634+
return (EBADSLT);
635+
}
636+
637+
/* Already correct value? */
638+
if (rc == (int)turn_on)
639+
return (0);
640+
641+
sysfs_path = zpool_power_sysfs_path(zhp, vdev);
642+
if (sysfs_path == NULL)
643+
return (ENOENT);
644+
645+
if (zpool_power_use_word(sysfs_path)) {
646+
val = turn_on ? "on" : "off";
647+
} else {
648+
val = turn_on ? "1" : "0";
649+
}
650+
651+
rc = zpool_sysfs_puts(sysfs_path, (char *)val);
652+
653+
free(sysfs_path);
654+
if (rc != 0) {
655+
return (EIO);
656+
}
657+
658+
/*
659+
* Wait up to 30 seconds for sysfs power value to change after
660+
* writing it.
661+
*/
662+
timeout_ms = zpool_getenv_int("ZPOOL_POWER_ON_SLOT_TIMEOUT_MS", 30000);
663+
for (int i = 0; i < MAX(1, timeout_ms / 200); i++) {
664+
rc = zpool_power_current_state(zhp, vdev);
665+
if (rc == (int)turn_on)
666+
return (0); /* success */
667+
668+
fsleep(0.200); /* 200ms */
669+
}
670+
671+
/* sysfs value never changed */
672+
return (EBADE);
673+
}

cmd/zpool/zpool_iter.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,10 @@ for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl)
554554
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
555555
return (1);
556556

557+
/* Make sure we're getting the updated enclosure sysfs path */
558+
update_vdev_config_dev_sysfs_path(nv, path,
559+
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
560+
557561
nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
558562
&vdev_enc_sysfs_path);
559563

0 commit comments

Comments
 (0)