lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 29 Sep 2023 00:04:03 +0800
From:   Kemeng Shi <shikemeng@...weicloud.com>
To:     ritesh.list@...il.com, tytso@....edu, adilger.kernel@...ger.ca
Cc:     ojaswin@...ux.ibm.com, linux-ext4@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: [PATCH v8 08/12] ext4: Separate block bitmap and buddy bitmap freeing in ext4_group_add_blocks()

This patch separates block bitmap and buddy bitmap freeing in order to
update block bitmap with ext4_mb_mark_context in following patch.
The reason why this can be sperated is explained in previous submit.
Put the explanation here to simplify the code archeology to
ext4_group_add_blocks():

Separated freeing is safe with concurrent allocation as long as:
1. Firstly allocate block in buddy bitmap, and then in block bitmap.
2. Firstly free block in block bitmap, and then buddy bitmap.
Then freed block will only be available to allocation when both buddy
bitmap and block bitmap are updated by freeing.
Allocation obeys rule 1 already, just do sperated freeing with rule 2.

Separated freeing has no race with generate_buddy as:
Once ext4_mb_load_buddy_gfp is executed successfully, the update-to-date
buddy page can be found in sbi->s_buddy_cache and no more buddy
initialization of the buddy page will be executed concurrently until
buddy page is unloaded. As we always do free in "load buddy, free,
unload buddy" sequence, separated freeing has no race with generate_buddy.

Signed-off-by: Kemeng Shi <shikemeng@...weicloud.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@...il.com>
---
 fs/ext4/mballoc.c | 54 +++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 767a975be2ca..27fa3b3b931f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -6685,35 +6685,39 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 		ext4_warning(sb, "too many blocks added to group %u",
 			     block_group);
 		err = -EINVAL;
-		goto error_return;
+		goto error_out;
+	}
+
+	err = ext4_mb_load_buddy(sb, block_group, &e4b);
+	if (err)
+		goto error_out;
+
+	if (!ext4_sb_block_valid(sb, NULL, block, count)) {
+		ext4_error(sb, "Adding blocks in system zones - "
+			   "Block = %llu, count = %lu",
+			   block, count);
+		err = -EINVAL;
+		goto error_clean;
 	}
 
 	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
 	if (IS_ERR(bitmap_bh)) {
 		err = PTR_ERR(bitmap_bh);
 		bitmap_bh = NULL;
-		goto error_return;
+		goto error_clean;
 	}
 
 	desc = ext4_get_group_desc(sb, block_group, &gd_bh);
 	if (!desc) {
 		err = -EIO;
-		goto error_return;
-	}
-
-	if (!ext4_sb_block_valid(sb, NULL, block, count)) {
-		ext4_error(sb, "Adding blocks in system zones - "
-			   "Block = %llu, count = %lu",
-			   block, count);
-		err = -EINVAL;
-		goto error_return;
+		goto error_clean;
 	}
 
 	BUFFER_TRACE(bitmap_bh, "getting write access");
 	err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
 					    EXT4_JTR_NONE);
 	if (err)
-		goto error_return;
+		goto error_clean;
 
 	/*
 	 * We are about to modify some metadata.  Call the journal APIs
@@ -6723,7 +6727,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 	BUFFER_TRACE(gd_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
 	if (err)
-		goto error_return;
+		goto error_clean;
 
 	for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
 		BUFFER_TRACE(bitmap_bh, "clear bit");
@@ -6736,26 +6740,14 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 		}
 	}
 
-	err = ext4_mb_load_buddy(sb, block_group, &e4b);
-	if (err)
-		goto error_return;
-
-	/*
-	 * need to update group_info->bb_free and bitmap
-	 * with group lock held. generate_buddy look at
-	 * them with group lock_held
-	 */
 	ext4_lock_group(sb, block_group);
 	mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
-	mb_free_blocks(NULL, &e4b, bit, cluster_count);
 	free_clusters_count = clusters_freed +
 		ext4_free_group_clusters(sb, desc);
 	ext4_free_group_clusters_set(sb, desc, free_clusters_count);
 	ext4_block_bitmap_csum_set(sb, desc, bitmap_bh);
 	ext4_group_desc_csum_set(sb, block_group, desc);
 	ext4_unlock_group(sb, block_group);
-	percpu_counter_add(&sbi->s_freeclusters_counter,
-			   clusters_freed);
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
@@ -6764,8 +6756,6 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 						  flex_group)->free_clusters);
 	}
 
-	ext4_mb_unload_buddy(&e4b);
-
 	/* We dirtied the bitmap block */
 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
 	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -6776,8 +6766,16 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 	if (!err)
 		err = ret;
 
-error_return:
+	ext4_lock_group(sb, block_group);
+	mb_free_blocks(NULL, &e4b, bit, cluster_count);
+	ext4_unlock_group(sb, block_group);
+	percpu_counter_add(&sbi->s_freeclusters_counter,
+			   clusters_freed);
+
+error_clean:
 	brelse(bitmap_bh);
+	ext4_mb_unload_buddy(&e4b);
+error_out:
 	ext4_std_error(sb, err);
 	return err;
 }
-- 
2.30.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ