Skip to content

fuse: make fuse max-write/max-read params tunable #4501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: devel
Choose a base branch
from

Conversation

chen1195585098
Copy link
Contributor

@chen1195585098 chen1195585098 commented Mar 23, 2025

Since kernel 4.20, the FUSE driver introduced the max-pages feature. To enhance write performance, this patch exports a new mount option fuse-max-write to let users specify the maximum write bytes for FUSE. The max-pages parameter for fuse connection will be adjusted accordingly.

Updates: #4500

@gluster-ant
Copy link
Collaborator

Can one of the admins verify this patch?

1 similar comment
@gluster-ant
Copy link
Collaborator

Can one of the admins verify this patch?

@gluster-ant
Copy link
Collaborator

CLANG-FORMAT FAILURE:
Before merging the patch, this diff needs to be considered for passing clang-format

index 5d852164f..49cb748f8 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -271,8 +271,8 @@ static struct argp_option gf_options[] = {
     {"fuse-handle-copy_file_range", ARGP_FUSE_HANDLE_COPY_FILE_RANGE, "BOOL",
      OPTION_ARG_OPTIONAL | OPTION_HIDDEN,
      "enable the handler of the FUSE_COPY_FILE_RANGE message"},
-    {"fuse-max-write", ARGP_FUSE_MAX_WRITE_KEY, "INTEGER",
-     OPTION_ARG_OPTIONAL, "set fuse max-write bytes"},
+    {"fuse-max-write", ARGP_FUSE_MAX_WRITE_KEY, "INTEGER", OPTION_ARG_OPTIONAL,
+     "set fuse max-write bytes"},
     {0, 0, 0, 0, "Miscellaneous Options:"},
     {
         0,
@@ -1472,18 +1472,17 @@ parse_opts(int key, char *arg, struct argp_state *state)
             break;
         case ARGP_FUSE_MAX_WRITE_KEY:
             if (gf_string2uint32(arg, &cmd_args->fuse_max_write)) {
-                argp_failure(state, -1, 0,
-                            "unknown fuse max-write option %s", arg);
+                argp_failure(state, -1, 0, "unknown fuse max-write option %s",
+                             arg);
             } else if ((cmd_args->fuse_max_write < 4096) ||
                        (cmd_args->fuse_max_write > 1048576)) {
                 argp_failure(state, -1, 0,
-                            "Invalid fuse max-write bytes %s. "
-                            "Valid range: [\"4096, 1048576\"]",
-                            arg);
+                             "Invalid fuse max-write bytes %s. "
+                             "Valid range: [\"4096, 1048576\"]",
+                             arg);
             }
 
             break;
-
     }
     return 0;
 }
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index eda0569dd..8695ce9af 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -63,7 +63,6 @@
 #define O_PATH 010000000 /* from asm-generic/fcntl.h */
 #endif
 
-
 #ifndef EBADFD
 /* Mac OS X does not have EBADFD */
 #define EBADFD EBADF
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 85bfc3d8f..e23595883 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -6356,7 +6356,8 @@ fuse_priv_dump(xlator_t *this)
     if (!this)
         return -1;
 
-    private = this->private;
+   private
+    = this->private;
 
     if (!private)
         return -1;
@@ -6510,7 +6511,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
     glusterfs_graph_t *graph = NULL;
     struct pollfd pfd = {0};
 
-    private = this->private;
+   private
+    = this->private;
 
     graph = data;
 
@@ -6532,7 +6534,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
                 (event == GF_EVENT_CHILD_DOWN)) {
                 pthread_mutex_lock(&private->sync_mutex);
                 {
-                    private->event_recvd = 1;
+                   private
+                    ->event_recvd = 1;
                     pthread_cond_broadcast(&private->sync_cond);
                 }
                 pthread_mutex_unlock(&private->sync_mutex);
@@ -6541,16 +6544,18 @@ notify(xlator_t *this, int32_t event, void *data, ...)
             pthread_mutex_lock(&private->sync_mutex);
             {
                 if (!private->fuse_thread_started) {
-                    private->fuse_thread_started = 1;
+                   private
+                    ->fuse_thread_started = 1;
                     start_thread = _gf_true;
                 }
             }
             pthread_mutex_unlock(&private->sync_mutex);
 
             if (start_thread) {
-                private->fuse_thread = GF_CALLOC(private->reader_thread_count,
-                                                 sizeof(pthread_t),
-                                                 gf_fuse_mt_pthread_t);
+               private
+                ->fuse_thread = GF_CALLOC(private->reader_thread_count,
+                                          sizeof(pthread_t),
+                                          gf_fuse_mt_pthread_t);
                 for (i = 0; i < private->reader_thread_count; i++) {
                     ret = gf_thread_create(&private->fuse_thread[i], NULL,
                                            fuse_thread_proc, this, "fuseproc");
@@ -6584,7 +6589,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
                         if (fuse_get_mount_status(this) != 0) {
                             goto auth_fail_unlock;
                         }
-                        private->mount_finished = _gf_true;
+                       private
+                        ->mount_finished = _gf_true;
                     } else if (pfd.revents) {
                         gf_log(this->name, GF_LOG_ERROR,
                                "mount pipe closed without status");

@chen1195585098 chen1195585098 force-pushed the 1M_IO_request branch 2 times, most recently from 07aebc1 to 631cbc3 Compare March 23, 2025 08:46
@gluster-ant
Copy link
Collaborator

CLANG-FORMAT FAILURE:
Before merging the patch, this diff needs to be considered for passing clang-format

index eda0569dd..8695ce9af 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -63,7 +63,6 @@
 #define O_PATH 010000000 /* from asm-generic/fcntl.h */
 #endif
 
-
 #ifndef EBADFD
 /* Mac OS X does not have EBADFD */
 #define EBADFD EBADF
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 85bfc3d8f..e23595883 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -6356,7 +6356,8 @@ fuse_priv_dump(xlator_t *this)
     if (!this)
         return -1;
 
-    private = this->private;
+   private
+    = this->private;
 
     if (!private)
         return -1;
@@ -6510,7 +6511,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
     glusterfs_graph_t *graph = NULL;
     struct pollfd pfd = {0};
 
-    private = this->private;
+   private
+    = this->private;
 
     graph = data;
 
@@ -6532,7 +6534,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
                 (event == GF_EVENT_CHILD_DOWN)) {
                 pthread_mutex_lock(&private->sync_mutex);
                 {
-                    private->event_recvd = 1;
+                   private
+                    ->event_recvd = 1;
                     pthread_cond_broadcast(&private->sync_cond);
                 }
                 pthread_mutex_unlock(&private->sync_mutex);
@@ -6541,16 +6544,18 @@ notify(xlator_t *this, int32_t event, void *data, ...)
             pthread_mutex_lock(&private->sync_mutex);
             {
                 if (!private->fuse_thread_started) {
-                    private->fuse_thread_started = 1;
+                   private
+                    ->fuse_thread_started = 1;
                     start_thread = _gf_true;
                 }
             }
             pthread_mutex_unlock(&private->sync_mutex);
 
             if (start_thread) {
-                private->fuse_thread = GF_CALLOC(private->reader_thread_count,
-                                                 sizeof(pthread_t),
-                                                 gf_fuse_mt_pthread_t);
+               private
+                ->fuse_thread = GF_CALLOC(private->reader_thread_count,
+                                          sizeof(pthread_t),
+                                          gf_fuse_mt_pthread_t);
                 for (i = 0; i < private->reader_thread_count; i++) {
                     ret = gf_thread_create(&private->fuse_thread[i], NULL,
                                            fuse_thread_proc, this, "fuseproc");
@@ -6584,7 +6589,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
                         if (fuse_get_mount_status(this) != 0) {
                             goto auth_fail_unlock;
                         }
-                        private->mount_finished = _gf_true;
+                       private
+                        ->mount_finished = _gf_true;
                     } else if (pfd.revents) {
                         gf_log(this->name, GF_LOG_ERROR,
                                "mount pipe closed without status");

@ThalesBarretto
Copy link
Contributor

@chen1195585098 Do you mind coming up with a test case or benchmark that displays the expected performance gain, so we can reproduce it?

@@ -6177,7 +6183,7 @@ fuse_thread_proc(void *data)
iov_in[1].iov_base = iobuf->ptr;

iov_in[0].iov_len = msg0_size;
iov_in[1].iov_len = psize;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you still need psize?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you still need psize?

Thanks, indeed no longer need for psize. This draft needs further improvements and is still under development and test.

I will push my updates as soon as possible.

@chen1195585098
Copy link
Contributor Author

@chen1195585098 Do you mind coming up with a test case or benchmark that displays the expected performance gain, so we can reproduce it?

Yes, more performance data will be updated later, I am working on it.

Since kernel 4.20, the FUSE driver introduced the max-pages feature.
To enhance write performance, this patch exports two mount options
fuse-max-write and fuse-max-read to let users specify the maximum
write/read bytes for FUSE connection.

By ajudging fuse-max-writef/fuse-max-read, the max-pages parameter
for fuse connection will be changed accordingly. It helps to avoid
IO splits when IO size is larger than 128K.

Updates: gluster#4500
Signed-off-by: chenjinhao <[email protected]>
@chen1195585098 chen1195585098 changed the title fuse: make fuse max-write param tunable fuse: make fuse max-write/max-read params tunable May 31, 2025
@gluster-ant
Copy link
Collaborator

CLANG-FORMAT FAILURE:
Before merging the patch, this diff needs to be considered for passing clang-format

index 8deafc7c9..a07305eb2 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1503,7 +1503,6 @@ parse_opts(int key, char *arg, struct argp_state *state)
             }
 
             break;
-
     }
     return 0;
 }
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index 563b15bf5..b6e3db424 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -63,7 +63,6 @@
 #define O_PATH 010000000 /* from asm-generic/fcntl.h */
 #endif
 
-
 #ifndef EBADFD
 /* Mac OS X does not have EBADFD */
 #define EBADFD EBADF
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 9119e51ca..34a04e9ea 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -5158,7 +5158,8 @@ fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg,
     if (fini->minor >= 28) {
         fino.max_readahead = priv->fuse_max_read;
         fino.max_write = priv->fuse_max_write;
-        fino.max_pages = max(priv->fuse_max_write, priv->fuse_max_read) / sysconf(_SC_PAGESIZE);
+        fino.max_pages = max(priv->fuse_max_write, priv->fuse_max_read) /
+                         sysconf(_SC_PAGESIZE);
         if (fino.max_pages == FUSE_MAX_MAX_PAGES)
             fino.flags |= FUSE_MAX_PAGES;
     }
@@ -6358,7 +6359,8 @@ fuse_priv_dump(xlator_t *this)
     if (!this)
         return -1;
 
-    private = this->private;
+   private
+    = this->private;
 
     if (!private)
         return -1;
@@ -6512,7 +6514,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
     glusterfs_graph_t *graph = NULL;
     struct pollfd pfd = {0};
 
-    private = this->private;
+   private
+    = this->private;
 
     graph = data;
 
@@ -6534,7 +6537,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
                 (event == GF_EVENT_CHILD_DOWN)) {
                 pthread_mutex_lock(&private->sync_mutex);
                 {
-                    private->event_recvd = 1;
+                   private
+                    ->event_recvd = 1;
                     pthread_cond_broadcast(&private->sync_cond);
                 }
                 pthread_mutex_unlock(&private->sync_mutex);
@@ -6543,16 +6547,18 @@ notify(xlator_t *this, int32_t event, void *data, ...)
             pthread_mutex_lock(&private->sync_mutex);
             {
                 if (!private->fuse_thread_started) {
-                    private->fuse_thread_started = 1;
+                   private
+                    ->fuse_thread_started = 1;
                     start_thread = _gf_true;
                 }
             }
             pthread_mutex_unlock(&private->sync_mutex);
 
             if (start_thread) {
-                private->fuse_thread = GF_CALLOC(private->reader_thread_count,
-                                                 sizeof(pthread_t),
-                                                 gf_fuse_mt_pthread_t);
+               private
+                ->fuse_thread = GF_CALLOC(private->reader_thread_count,
+                                          sizeof(pthread_t),
+                                          gf_fuse_mt_pthread_t);
                 for (i = 0; i < private->reader_thread_count; i++) {
                     ret = gf_thread_create(&private->fuse_thread[i], NULL,
                                            fuse_thread_proc, this, "fuseproc");
@@ -6586,7 +6592,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
                         if (fuse_get_mount_status(this) != 0) {
                             goto auth_fail_unlock;
                         }
-                        private->mount_finished = _gf_true;
+                       private
+                        ->mount_finished = _gf_true;
                     } else if (pfd.revents) {
                         gf_log(this->name, GF_LOG_ERROR,
                                "mount pipe closed without status");
@@ -6921,8 +6928,7 @@ init(xlator_t *this_xl)
     GF_OPTION_INIT("fuse-max-write", priv->fuse_max_write, uint32,
                    cleanup_exit);
 
-    GF_OPTION_INIT("fuse-max-read", priv->fuse_max_read, uint32,
-                   cleanup_exit);
+    GF_OPTION_INIT("fuse-max-read", priv->fuse_max_read, uint32, cleanup_exit);
 
     /* user has set only background-qlen, not congestion-threshold,
        use the fuse kernel driver formula to set congestion. ie, 75% */

@chen1195585098
Copy link
Contributor Author

1M IO performance comparison:

  avg-seqwrite avg-seqread avg-randwrite avg-randread
now(without this patch) 81.10 MB/s 746.10 MB/s 90.41 MB/s 728.76 MB/s
with this patch 100.93 MB/s 1260.76 MB/s 120.10 MB/s 1178.31 MB/s
improvement + 24.45% + 68.98% + 32.84% + 61.69%

Test Env:

[root@localhost glusterfs]# uname -r
5.10.134-16.2.an8.aarch64
[root@localhost glusterfs]# gluster --version|head -1
glusterfs 11dev

vdbench conf:

hd=default,vdbench=/home/vdbench50407,user=root,shell=ssh,jvms=1

# Vdbench Test Case for Sequential and Random Read/Write Performance
# Block Size: 1M
# Directory: /glusterfs_mnt

# Define the storage configuration
sd=default,openflags=o_direct
sd=sd1,lun=/glusterfs_mnt/testfile,size=10g,threads=64,openflags=o_direct
sd=sd2,lun=/glusterfs_mnt/testfile,size=10g,threads=64,openflags=o_direct
sd=sd3,lun=/glusterfs_mnt/testfile,size=10g,threads=64,openflags=o_direct
sd=sd4,lun=/glusterfs_mnt/testfile,size=10g,threads=64,openflags=o_direct

wd=warmup,sd=sd*,rdpct=0,seekpct=100,xfersize=1m

# Define the workload configuration
wd=default
wd=seqwrite,sd=sd1,rdpct=0,seekpct=0,xfersize=1m
wd=seqread,sd=sd2,rdpct=100,seekpct=0,xfersize=1m
wd=randwrite,sd=sd3,rdpct=0,seekpct=100,xfersize=1m
wd=randread,sd=sd4,rdpct=100,seekpct=100,xfersize=1m

# Define the run configuration
#rd=default,warmup=30,elapsed=150,interval=5
rd=warmup,wd=warmup,iorate=max,threads=64
rd=seqwrite,wd=seqwrite,iorate=max,threads=4
rd=seqread,wd=seqread,iorate=max,threads=64
rd=randwrite,wd=randwrite,iorate=max,threads=4
rd=randread,wd=randread,iorate=max,threads=64

@ThalesBarretto
Copy link
Contributor

looks promising, definitely taking a closer look at this

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants