Discussion:
memcpy(a,b,CONST) is not inlined by gcc 3.4.1 in Linux kernel
(too old to reply)
Denis Vlasenko
2005-03-29 14:40:09 UTC
Permalink
Try testcase below the sig.

This causes nearly one thousand calls to memcpy in my kernel
(not an allyesconfig one):

# objdump -d vmlinux | grep -F '<memcpy>' | wc -l
959

# gcc -O2 -c t.c
# objdump -r -d t.o

t.o: file format elf32-i386

Disassembly of section .text:

00000000 <f3>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 83 ec 0c sub $0xc,%esp
6: 6a 03 push $0x3
8: ff 75 0c pushl 0xc(%ebp)
b: ff 75 08 pushl 0x8(%ebp)
e: e8 fc ff ff ff call f <f3+0xf>
f: R_386_PC32 memcpy
13: 83 c4 10 add $0x10,%esp
16: c9 leave
17: c3 ret

00000018 <f3b>:
18: 55 push %ebp
19: 89 e5 mov %esp,%ebp
1b: 8b 55 0c mov 0xc(%ebp),%edx
1e: 66 8b 02 mov (%edx),%ax
21: 8b 4d 08 mov 0x8(%ebp),%ecx
24: 66 89 01 mov %ax,(%ecx)
27: 8a 42 02 mov 0x2(%edx),%al
2a: 88 41 02 mov %al,0x2(%ecx)
2d: c9 leave
2e: c3 ret
2f: 90 nop

00000030 <f3k>:
30: 55 push %ebp
31: 89 e5 mov %esp,%ebp
33: 57 push %edi
34: 56 push %esi
35: 8b 7d 08 mov 0x8(%ebp),%edi
38: 8b 75 0c mov 0xc(%ebp),%esi
3b: b9 ee 02 00 00 mov $0x2ee,%ecx
40: f3 a5 repz movsl %ds:(%esi),%es:(%edi)
42: 5e pop %esi
43: 5f pop %edi
44: c9 leave
45: c3 ret

--
vda

typedef unsigned int size_t;

static inline void * __memcpy(void * to, const void * from, size_t n)
{
int d0, d1, d2;
__asm__ __volatile__(
"rep ; movsl\n\t"
"testb $2,%b4\n\t"
"je 1f\n\t"
"movsw\n"
"1:\ttestb $1,%b4\n\t"
"je 2f\n\t"
"movsb\n"
"2:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
:"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
: "memory");
return (to);
}

/*
* This looks horribly ugly, but the compiler can optimize it totally,
* as the count is constant.
*/
static inline void * __constant_memcpy(void * to, const void * from, size_t n)
{
if (n <= 128)
return __builtin_memcpy(to, from, n);

#define COMMON(x) \
__asm__ __volatile__( \
"rep ; movsl" \
x \
: "=&c" (d0), "=&D" (d1), "=&S" (d2) \
: "0" (n/4),"1" ((long) to),"2" ((long) from) \
: "memory");
{
int d0, d1, d2;
switch (n % 4) {
case 0: COMMON(""); return to;
case 1: COMMON("\n\tmovsb"); return to;
case 2: COMMON("\n\tmovsw"); return to;
default: COMMON("\n\tmovsw\n\tmovsb"); return to;
}
}

#undef COMMON
}

#define memcpy(t, f, n) \
(__builtin_constant_p(n) ? \
__constant_memcpy((t),(f),(n)) : \
__memcpy((t),(f),(n)))

int f3(char *a, char *b) { memcpy(a,b,3); }
int f3b(char *a, char *b) { __builtin_memcpy(a,b,3); }
int f3k(char *a, char *b) { memcpy(a,b,3000); }

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



-------------------------------------------------------------------------------
Achtung: diese Newsgruppe ist eine unidirektional gegatete Mailingliste.
Antworten nur per Mail an die im Reply-To-Header angegebene Adresse.
Fragen zum Gateway -> ***@inka.de.
-------------------------------------------------------------------------------
Andrew Pinski
2005-03-29 15:44:03 UTC
Permalink
Post by Denis Vlasenko
/*
* This looks horribly ugly, but the compiler can optimize it totally,
* as the count is constant.
*/
static inline void * __constant_memcpy(void * to, const void * from, size_t n)
{
if (n <= 128)
return __builtin_memcpy(to, from, n);
The problem is that in GCC < 4.0 there is no constant propagation
pass before expanding builtin functions, so the __builtin_memcpy
call above sees a variable rather than a constant.
or change "size_t n" to "const size_t n" will also fix the issue.
As we do some (well very little and with inlining and const values)
const progation before 4.0.0 on the trees before expanding the builtin.

-- Pinski
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



-------------------------------------------------------------------------------
Achtung: diese Newsgruppe ist eine unidirektional gegatete Mailingliste.
Antworten nur per Mail an die im Reply-To-Header angegebene Adresse.
Fragen zum Gateway -> ***@inka.de.
-------------------------------------------------------------------------------
Jakub Jelinek
2005-03-29 15:19:53 UTC
Permalink
Post by Denis Vlasenko
typedef unsigned int size_t;
static inline void * __memcpy(void * to, const void * from, size_t n)
{
int d0, d1, d2;
__asm__ __volatile__(
"rep ; movsl\n\t"
"testb $2,%b4\n\t"
"je 1f\n\t"
"movsw\n"
"1:\ttestb $1,%b4\n\t"
"je 2f\n\t"
"movsb\n"
"2:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
:"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
: "memory");
return (to);
}
/*
* This looks horribly ugly, but the compiler can optimize it totally,
* as the count is constant.
*/
static inline void * __constant_memcpy(void * to, const void * from, size_t n)
{
if (n <= 128)
return __builtin_memcpy(to, from, n);
#define COMMON(x) \
__asm__ __volatile__( \
"rep ; movsl" \
x \
: "=&c" (d0), "=&D" (d1), "=&S" (d2) \
: "0" (n/4),"1" ((long) to),"2" ((long) from) \
: "memory");
{
int d0, d1, d2;
switch (n % 4) {
case 0: COMMON(""); return to;
case 1: COMMON("\n\tmovsb"); return to;
case 2: COMMON("\n\tmovsw"); return to;
default: COMMON("\n\tmovsw\n\tmovsb"); return to;
}
}
#undef COMMON
}
#define memcpy(t, f, n) \
(__builtin_constant_p(n) ? \
__constant_memcpy((t),(f),(n)) : \
__memcpy((t),(f),(n)))
int f3(char *a, char *b) { memcpy(a,b,3); }
The problem is that in GCC < 4.0 there is no constant propagation
pass before expanding builtin functions, so the __builtin_memcpy
call above sees a variable rather than a constant.

Either use GCC 4.0+, where this works just fine, or move the
n <= 128 case into the macro:
#define memcpy(t, f, n) \
(__builtin_constant_p(n) ? \
((n) <= 128 ? __builtin_memcpy(t,f,n) : __constant_memcpy(t,f,n) : \
__memcpy(t,f,n))

Jakub
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



-------------------------------------------------------------------------------
Achtung: diese Newsgruppe ist eine unidirektional gegatete Mailingliste.
Antworten nur per Mail an die im Reply-To-Header angegebene Adresse.
Fragen zum Gateway -> ***@inka.de.
-------------------------------------------------------------------------------
Nathan Sidwell
2005-03-29 15:17:38 UTC
Permalink
Post by Denis Vlasenko
e: e8 fc ff ff ff call f <f3+0xf>
f: R_386_PC32 memcpy
#define memcpy(t, f, n) \
(__builtin_constant_p(n) ? \
__constant_memcpy((t),(f),(n)) : \
__memcpy((t),(f),(n)))
given this #define, how can 'memcpy' appear in the object file? It appears
that something odd is happening with preprocessing. Check the .i files are
as you expect. -dD and -E options will be helpful to you.

nathan
--
Nathan Sidwell :: http://www.codesourcery.com :: CodeSourcery LLC
***@codesourcery.com :: http://www.planetfall.pwp.blueyonder.co.uk

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



-------------------------------------------------------------------------------
Achtung: diese Newsgruppe ist eine unidirektional gegatete Mailingliste.
Antworten nur per Mail an die im Reply-To-Header angegebene Adresse.
Fragen zum Gateway -> ***@inka.de.
-------------------------------------------------------------------------------
Richard Guenther
2005-03-29 15:10:48 UTC
Permalink
Post by Denis Vlasenko
Try testcase below the sig.
This causes nearly one thousand calls to memcpy in my kernel
static inline void * __memcpy(void * to, const void * from, size_t n)
{
int d0, d1, d2;
__asm__ __volatile__(
"rep ; movsl\n\t"
"testb $2,%b4\n\t"
"je 1f\n\t"
"movsw\n"
"1:\ttestb $1,%b4\n\t"
"je 2f\n\t"
"movsb\n"
"2:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
:"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
: "memory");
return (to);
}
The question is, what reason does -Winline give for this inlining
decision? And then
of course, how is the size estimate counted for the above. What kind
of tree node do
we get for the ASM expression?

Richard.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



-------------------------------------------------------------------------------
Achtung: diese Newsgruppe ist eine unidirektional gegatete Mailingliste.
Antworten nur per Mail an die im Reply-To-Header angegebene Adresse.
Fragen zum Gateway -> ***@inka.de.
-------------------------------------------------------------------------------
Loading...