mail archive of the barebox mailing list
 help / color / mirror / Atom feed
From: Ahmad Fatoum <a.fatoum@pengutronix.de>
To: Sascha Hauer <s.hauer@pengutronix.de>,
	"open list:BAREBOX" <barebox@lists.infradead.org>
Subject: Re: [PATCH 05/10] ARM: update findbit.S from Linux
Date: Wed, 25 Sep 2024 18:03:39 +0200	[thread overview]
Message-ID: <a4b682b6-1182-4a87-bf1e-08686fb52d5f@pengutronix.de> (raw)
In-Reply-To: <20240925-arm-assembly-memmove-v1-5-0d92103658a0@pengutronix.de>

On 25.09.24 15:55, Sascha Hauer wrote:
> This updates findbit.S from Linux

Please mention version.

> 
> Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
> ---
>  arch/arm/lib32/findbit.S | 243 ++++++++++++++++++-----------------------------
>  1 file changed, 94 insertions(+), 149 deletions(-)
> 
> diff --git a/arch/arm/lib32/findbit.S b/arch/arm/lib32/findbit.S
> index 82a0f34dc2..b7ac2d3c07 100644
> --- a/arch/arm/lib32/findbit.S
> +++ b/arch/arm/lib32/findbit.S
> @@ -1,9 +1,8 @@
>  /* SPDX-License-Identifier: GPL-2.0-only */
> -/* SPDX-FileCopyrightText: 1995-2000 Russell King */
> -
>  /*
> - * Originally from Linux kernel
> - *  arch/arm/lib/findbit.S
> + *  linux/arch/arm/lib/findbit.S
> + *
> + *  Copyright (C) 1995-2000 Russell King
>   *
>   * 16th March 2001 - John Ripley <jripley@sonicblue.com>
>   *   Fixed so that "size" is an exclusive not an inclusive quantity.
> @@ -13,182 +12,128 @@
>   */
>  #include <linux/linkage.h>
>  #include <asm/assembler.h>
> +#include <asm/unwind.h>
>                  .text
>  
> -/*
> - * Purpose  : Find a 'zero' bit
> - * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
> - */
> -ENTRY(_find_first_zero_bit_le)
> -		teq	r1, #0
> -		beq	3f
> -		mov	r2, #0
> -1:
> - ARM(		ldrb	r3, [r0, r2, lsr #3]	)
> - THUMB(		lsr	r3, r2, #3		)
> - THUMB(		ldrb	r3, [r0, r3]		)
> -		eors	r3, r3, #0xff		@ invert bits
> -		bne	.L_found		@ any now set - found zero bit
> -		add	r2, r2, #8		@ next bit pointer
> -2:		cmp	r2, r1			@ any more?
> -		blo	1b
> -3:		mov	r0, r1			@ no free bits
> -		mov	pc, lr
> -ENDPROC(_find_first_zero_bit_le)
> -
> -/*
> - * Purpose  : Find next 'zero' bit
> - * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
> - */
> -ENTRY(_find_next_zero_bit_le)
> -		teq	r1, #0
> -		beq	3b
> -		ands	ip, r2, #7
> -		beq	1b			@ If new byte, goto old routine
> - ARM(		ldrb	r3, [r0, r2, lsr #3]	)
> - THUMB(		lsr	r3, r2, #3		)
> - THUMB(		ldrb	r3, [r0, r3]		)
> -		eor	r3, r3, #0xff		@ now looking for a 1 bit
> -		movs	r3, r3, lsr ip		@ shift off unused bits
> -		bne	.L_found
> -		orr	r2, r2, #7		@ if zero, then no bits here
> -		add	r2, r2, #1		@ align bit pointer
> -		b	2b			@ loop for next bit
> -ENDPROC(_find_next_zero_bit_le)
> +#ifdef __ARMEB__
> +#define SWAB_ENDIAN le
> +#else
> +#define SWAB_ENDIAN be
> +#endif
>  
> -/*
> - * Purpose  : Find a 'one' bit
> - * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit);
> - */
> -ENTRY(_find_first_bit_le)
> +		.macro	find_first, endian, set, name
> +ENTRY(_find_first_\name\()bit_\endian)
> +	UNWIND(	.fnstart)
>  		teq	r1, #0
>  		beq	3f
>  		mov	r2, #0
> -1:
> - ARM(		ldrb	r3, [r0, r2, lsr #3]	)
> - THUMB(		lsr	r3, r2, #3		)
> - THUMB(		ldrb	r3, [r0, r3]		)
> -		movs	r3, r3
> -		bne	.L_found		@ any now set - found zero bit
> -		add	r2, r2, #8		@ next bit pointer
> +1:		ldr	r3, [r0], #4
> +		.ifeq \set
> +		mvns	r3, r3			@ invert/test bits
> +		.else
> +		movs	r3, r3			@ test bits
> +		.endif
> +		.ifc \endian, SWAB_ENDIAN
> +		bne	.L_found_swab
> +		.else
> +		bne	.L_found		@ found the bit?
> +		.endif
> +		add	r2, r2, #32		@ next index
>  2:		cmp	r2, r1			@ any more?
>  		blo	1b
> -3:		mov	r0, r1			@ no free bits
> -		mov	pc, lr
> -ENDPROC(_find_first_bit_le)
> +3:		mov	r0, r1			@ no more bits
> +		ret	lr
> +	UNWIND(	.fnend)
> +ENDPROC(_find_first_\name\()bit_\endian)
> +		.endm
>  
> -/*
> - * Purpose  : Find next 'one' bit
> - * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
> - */
> -ENTRY(_find_next_bit_le)
> -		teq	r1, #0
> -		beq	3b
> -		ands	ip, r2, #7
> -		beq	1b			@ If new byte, goto old routine
> - ARM(		ldrb	r3, [r0, r2, lsr #3]	)
> - THUMB(		lsr	r3, r2, #3		)
> - THUMB(		ldrb	r3, [r0, r3]		)
> +		.macro	find_next, endian, set, name
> +ENTRY(_find_next_\name\()bit_\endian)
> +	UNWIND(	.fnstart)
> +		cmp	r2, r1
> +		bhs	3b
> +		mov	ip, r2, lsr #5		@ word index
> +		add	r0, r0, ip, lsl #2
> +		ands	ip, r2, #31		@ bit position
> +		beq	1b
> +		ldr	r3, [r0], #4
> +		.ifeq \set
> +		mvn	r3, r3			@ invert bits
> +		.endif
> +		.ifc \endian, SWAB_ENDIAN
> +		rev_l	r3, ip
> +		.if	.Lrev_l_uses_tmp
> +		@ we need to recompute ip because rev_l will have overwritten
> +		@ it.
> +		and	ip, r2, #31		@ bit position
> +		.endif
> +		.endif
>  		movs	r3, r3, lsr ip		@ shift off unused bits
>  		bne	.L_found
> -		orr	r2, r2, #7		@ if zero, then no bits here
> +		orr	r2, r2, #31		@ no zero bits
>  		add	r2, r2, #1		@ align bit pointer
>  		b	2b			@ loop for next bit
> -ENDPROC(_find_next_bit_le)
> +	UNWIND(	.fnend)
> +ENDPROC(_find_next_\name\()bit_\endian)
> +		.endm
>  
> -#ifdef __ARMEB__
> +		.macro	find_bit, endian, set, name
> +		find_first \endian, \set, \name
> +		find_next  \endian, \set, \name
> +		.endm
>  
> -ENTRY(_find_first_zero_bit_be)
> -		teq	r1, #0
> -		beq	3f
> -		mov	r2, #0
> -1:		eor	r3, r2, #0x18		@ big endian byte ordering
> - ARM(		ldrb	r3, [r0, r3, lsr #3]	)
> - THUMB(		lsr	r3, #3			)
> - THUMB(		ldrb	r3, [r0, r3]		)
> -		eors	r3, r3, #0xff		@ invert bits
> -		bne	.L_found		@ any now set - found zero bit
> -		add	r2, r2, #8		@ next bit pointer
> -2:		cmp	r2, r1			@ any more?
> -		blo	1b
> -3:		mov	r0, r1			@ no free bits
> -		mov	pc, lr
> -ENDPROC(_find_first_zero_bit_be)
> +/* _find_first_zero_bit_le and _find_next_zero_bit_le */
> +		find_bit le, 0, zero_
>  
> -ENTRY(_find_next_zero_bit_be)
> -		teq	r1, #0
> -		beq	3b
> -		ands	ip, r2, #7
> -		beq	1b			@ If new byte, goto old routine
> -		eor	r3, r2, #0x18		@ big endian byte ordering
> - ARM(		ldrb	r3, [r0, r3, lsr #3]	)
> - THUMB(		lsr	r3, #3			)
> - THUMB(		ldrb	r3, [r0, r3]		)
> -		eor	r3, r3, #0xff		@ now looking for a 1 bit
> -		movs	r3, r3, lsr ip		@ shift off unused bits
> -		bne	.L_found
> -		orr	r2, r2, #7		@ if zero, then no bits here
> -		add	r2, r2, #1		@ align bit pointer
> -		b	2b			@ loop for next bit
> -ENDPROC(_find_next_zero_bit_be)
> +/* _find_first_bit_le and _find_next_bit_le */
> +		find_bit le, 1
>  
> -ENTRY(_find_first_bit_be)
> -		teq	r1, #0
> -		beq	3f
> -		mov	r2, #0
> -1:		eor	r3, r2, #0x18		@ big endian byte ordering
> - ARM(		ldrb	r3, [r0, r3, lsr #3]	)
> - THUMB(		lsr	r3, #3			)
> - THUMB(		ldrb	r3, [r0, r3]		)
> -		movs	r3, r3
> -		bne	.L_found		@ any now set - found zero bit
> -		add	r2, r2, #8		@ next bit pointer
> -2:		cmp	r2, r1			@ any more?
> -		blo	1b
> -3:		mov	r0, r1			@ no free bits
> -		mov	pc, lr
> -ENDPROC(_find_first_bit_be)
> +#ifdef __ARMEB__
>  
> -ENTRY(_find_next_bit_be)
> -		teq	r1, #0
> -		beq	3b
> -		ands	ip, r2, #7
> -		beq	1b			@ If new byte, goto old routine
> -		eor	r3, r2, #0x18		@ big endian byte ordering
> - ARM(		ldrb	r3, [r0, r3, lsr #3]	)
> - THUMB(		lsr	r3, #3			)
> - THUMB(		ldrb	r3, [r0, r3]		)
> -		movs	r3, r3, lsr ip		@ shift off unused bits
> -		bne	.L_found
> -		orr	r2, r2, #7		@ if zero, then no bits here
> -		add	r2, r2, #1		@ align bit pointer
> -		b	2b			@ loop for next bit
> -ENDPROC(_find_next_bit_be)
> +/* _find_first_zero_bit_be and _find_next_zero_bit_be */
> +		find_bit be, 0, zero_
> +
> +/* _find_first_bit_be and _find_next_bit_be */
> +		find_bit be, 1
>  
>  #endif
>  
>  /*
>   * One or more bits in the LSB of r3 are assumed to be set.
>   */
> +.L_found_swab:
> +	UNWIND(	.fnstart)
> +		rev_l	r3, ip
>  .L_found:
> -#if __LINUX_ARM_ARCH__ >= 5
> +#if __LINUX_ARM_ARCH__ >= 7
> +		rbit	r3, r3			@ reverse bits
> +		clz	r3, r3			@ count high zero bits
> +		add	r0, r2, r3		@ add offset of first set bit
> +#elif __LINUX_ARM_ARCH__ >= 5
>  		rsb	r0, r3, #0
> -		and	r3, r3, r0
> -		clz	r3, r3
> -		rsb	r3, r3, #31
> -		add	r0, r2, r3
> +		and	r3, r3, r0		@ mask out lowest bit set
> +		clz	r3, r3			@ count high zero bits
> +		rsb	r3, r3, #31		@ offset of first set bit
> +		add	r0, r2, r3		@ add offset of first set bit
>  #else
> -		tst	r3, #0x0f
> +		mov	ip, #~0
> +		tst	r3, ip, lsr #16		@ test bits 0-15
> +		addeq	r2, r2, #16
> +		moveq	r3, r3, lsr #16
> +		tst	r3, #0x00ff
> +		addeq	r2, r2, #8
> +		moveq	r3, r3, lsr #8
> +		tst	r3, #0x000f
>  		addeq	r2, r2, #4
> -		movne	r3, r3, lsl #4
> -		tst	r3, #0x30
> +		moveq	r3, r3, lsr #4
> +		tst	r3, #0x0003
>  		addeq	r2, r2, #2
> -		movne	r3, r3, lsl #2
> -		tst	r3, #0x40
> +		moveq	r3, r3, lsr #2
> +		tst	r3, #0x0001
>  		addeq	r2, r2, #1
>  		mov	r0, r2
>  #endif
>  		cmp	r1, r0			@ Clamp to maxbit
>  		movlo	r0, r1
> -		mov	pc, lr
> -
> +		ret	lr
> +	UNWIND(	.fnend)
> 


-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |



  reply	other threads:[~2024-09-25 16:04 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-25 13:55 [PATCH 00/10] ARM: add assembler optimized memmove Sascha Hauer
2024-09-25 13:55 ` [PATCH 01/10] ARM: Use optimized reads[bwl] and writes[bwl] functions Sascha Hauer
2024-09-25 15:45   ` Ahmad Fatoum
2024-09-25 13:55 ` [PATCH 02/10] ARM: rename logical shift macros push pull into lspush lspull Sascha Hauer
2024-09-25 15:52   ` Ahmad Fatoum
2024-09-25 13:55 ` [PATCH 03/10] ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ Sascha Hauer
2024-09-25 15:56   ` Ahmad Fatoum
2024-09-25 13:55 ` [PATCH 04/10] ARM: update lib1funcs.S from Linux Sascha Hauer
2024-09-25 16:02   ` Ahmad Fatoum
2024-09-26  8:22     ` Sascha Hauer
2024-09-26 11:09       ` Ahmad Fatoum
2024-09-25 13:55 ` [PATCH 05/10] ARM: update findbit.S " Sascha Hauer
2024-09-25 16:03   ` Ahmad Fatoum [this message]
2024-09-25 13:55 ` [PATCH 06/10] ARM: update io-* " Sascha Hauer
2024-09-25 16:04   ` Ahmad Fatoum
2024-09-25 13:55 ` [PATCH 07/10] ARM: always assume the unified syntax for assembly code Sascha Hauer
2024-09-25 16:09   ` Ahmad Fatoum
2024-09-25 13:55 ` [PATCH 08/10] ARM: update memcpy.S and memset.S from Linux Sascha Hauer
2024-09-26  5:51   ` Ahmad Fatoum
2024-09-25 13:55 ` [PATCH 09/10] lib/string.c: export non optimized memmove as __default_memmove Sascha Hauer
2024-09-25 16:10   ` Ahmad Fatoum
2024-09-25 13:55 ` [PATCH 10/10] ARM: add optimized memmove Sascha Hauer
2024-09-26  5:48   ` Ahmad Fatoum
2024-09-26 11:12     ` Sascha Hauer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a4b682b6-1182-4a87-bf1e-08686fb52d5f@pengutronix.de \
    --to=a.fatoum@pengutronix.de \
    --cc=barebox@lists.infradead.org \
    --cc=s.hauer@pengutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox