Best c questions in July 2011

Is 0 a decimal literal or an octal literal?

104 votes

Zero is always zero, so it doesn't matter. But in a recent discussion with my friend he said that octal literals are almost unused today. Then it dawned upon me that actually almost all integer literals in my code are octal, namely 0. Is 0 an octal literal according to the C++ grammar? I'm just curious what the standard says.

Yes, 0 is an Octal literal in C++.

As per the C++ Standard:

2.14.2 Integer literals [lex.icon]

integer-literal:  
    decimal-literal integer-suffixopt  
    octal-literal integer-suffixopt  
    hexadecimal-literal integer-suffixopt  
decimal-literal:  
    nonzero-digit  
    decimal-literal digit  
octal-literal:  
    0                           <--------------------<Here>
    octal-literal octal-digit

Is 'switch' faster than 'if'?

73 votes

Is a switch statement actually faster than an if statement?

I ran the code below on Visual Studio 2010's x64 C++ compiler with the /Ox flag:

#include <stdlib.h>
#include <stdio.h>
#include <time.h>

#define MAX_COUNT (1 << 29)
size_t counter = 0;

size_t testSwitch()
{
    clock_t start = clock();
    size_t i;
    for (i = 0; i < MAX_COUNT; i++)
    {
        switch (counter % 4 + 1)
        {
            case 1: counter += 4; break;
            case 2: counter += 3; break;
            case 3: counter += 2; break;
            case 4: counter += 1; break;
        }
    }
    return 1000 * (clock() - start) / CLOCKS_PER_SEC;
}

size_t testIf()
{
    clock_t start = clock();
    size_t i;
    for (i = 0; i < MAX_COUNT; i++)
    {
        const size_t c = counter % 4 + 1;
        if (c == 1) { counter += 4; }
        else if (c == 2) { counter += 3; }
        else if (c == 3) { counter += 2; }
        else if (c == 4) { counter += 1; }
    }
    return 1000 * (clock() - start) / CLOCKS_PER_SEC;
}

int main()
{
    printf("Starting...\n");
    printf("Switch statement: %u ms\n", testSwitch());
    printf("If     statement: %u ms\n", testIf());
}

and got these results:

Switch statement: 5261 ms
If statement: 5196 ms

From what I've learned, switch statements apparently use jump tables to optimize the branching.

Questions:

  1. What would a basic jump table look like, in x86 or x64?

  2. Is this code using a jump table?

  3. Why is there no performance difference in this example? Is there any situation in which there is a significant performance difference?


Disassembly of the code:

testIf:

13FE81B10 sub  rsp,48h 
13FE81B14 call qword ptr [__imp_clock (13FE81128h)] 
13FE81B1A mov  dword ptr [start],eax 
13FE81B1E mov  qword ptr [i],0 
13FE81B27 jmp  testIf+26h (13FE81B36h) 
13FE81B29 mov  rax,qword ptr [i] 
13FE81B2E inc  rax  
13FE81B31 mov  qword ptr [i],rax 
13FE81B36 cmp  qword ptr [i],20000000h 
13FE81B3F jae  testIf+0C3h (13FE81BD3h) 
13FE81B45 xor  edx,edx 
13FE81B47 mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81B4E mov  ecx,4 
13FE81B53 div  rax,rcx 
13FE81B56 mov  rax,rdx 
13FE81B59 inc  rax  
13FE81B5C mov  qword ptr [c],rax 
13FE81B61 cmp  qword ptr [c],1 
13FE81B67 jne  testIf+6Dh (13FE81B7Dh) 
13FE81B69 mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81B70 add  rax,4 
13FE81B74 mov  qword ptr [counter (13FE835D0h)],rax 
13FE81B7B jmp  testIf+0BEh (13FE81BCEh) 
13FE81B7D cmp  qword ptr [c],2 
13FE81B83 jne  testIf+89h (13FE81B99h) 
13FE81B85 mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81B8C add  rax,3 
13FE81B90 mov  qword ptr [counter (13FE835D0h)],rax 
13FE81B97 jmp  testIf+0BEh (13FE81BCEh) 
13FE81B99 cmp  qword ptr [c],3 
13FE81B9F jne  testIf+0A5h (13FE81BB5h) 
13FE81BA1 mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81BA8 add  rax,2 
13FE81BAC mov  qword ptr [counter (13FE835D0h)],rax 
13FE81BB3 jmp  testIf+0BEh (13FE81BCEh) 
13FE81BB5 cmp  qword ptr [c],4 
13FE81BBB jne  testIf+0BEh (13FE81BCEh) 
13FE81BBD mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81BC4 inc  rax  
13FE81BC7 mov  qword ptr [counter (13FE835D0h)],rax 
13FE81BCE jmp  testIf+19h (13FE81B29h) 
13FE81BD3 call qword ptr [__imp_clock (13FE81128h)] 
13FE81BD9 sub  eax,dword ptr [start] 
13FE81BDD imul eax,eax,3E8h 
13FE81BE3 cdq       
13FE81BE4 mov  ecx,3E8h 
13FE81BE9 idiv eax,ecx 
13FE81BEB cdqe      
13FE81BED add  rsp,48h 
13FE81BF1 ret       

testSwitch:

13FE81C00 sub  rsp,48h 
13FE81C04 call qword ptr [__imp_clock (13FE81128h)] 
13FE81C0A mov  dword ptr [start],eax 
13FE81C0E mov  qword ptr [i],0 
13FE81C17 jmp  testSwitch+26h (13FE81C26h) 
13FE81C19 mov  rax,qword ptr [i] 
13FE81C1E inc  rax  
13FE81C21 mov  qword ptr [i],rax 
13FE81C26 cmp  qword ptr [i],20000000h 
13FE81C2F jae  testSwitch+0C5h (13FE81CC5h) 
13FE81C35 xor  edx,edx 
13FE81C37 mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81C3E mov  ecx,4 
13FE81C43 div  rax,rcx 
13FE81C46 mov  rax,rdx 
13FE81C49 inc  rax  
13FE81C4C mov  qword ptr [rsp+30h],rax 
13FE81C51 cmp  qword ptr [rsp+30h],1 
13FE81C57 je   testSwitch+73h (13FE81C73h) 
13FE81C59 cmp  qword ptr [rsp+30h],2 
13FE81C5F je   testSwitch+87h (13FE81C87h) 
13FE81C61 cmp  qword ptr [rsp+30h],3 
13FE81C67 je   testSwitch+9Bh (13FE81C9Bh) 
13FE81C69 cmp  qword ptr [rsp+30h],4 
13FE81C6F je   testSwitch+0AFh (13FE81CAFh) 
13FE81C71 jmp  testSwitch+0C0h (13FE81CC0h) 
13FE81C73 mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81C7A add  rax,4 
13FE81C7E mov  qword ptr [counter (13FE835D0h)],rax 
13FE81C85 jmp  testSwitch+0C0h (13FE81CC0h) 
13FE81C87 mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81C8E add  rax,3 
13FE81C92 mov  qword ptr [counter (13FE835D0h)],rax 
13FE81C99 jmp  testSwitch+0C0h (13FE81CC0h) 
13FE81C9B mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81CA2 add  rax,2 
13FE81CA6 mov  qword ptr [counter (13FE835D0h)],rax 
13FE81CAD jmp  testSwitch+0C0h (13FE81CC0h) 
13FE81CAF mov  rax,qword ptr [counter (13FE835D0h)] 
13FE81CB6 inc  rax  
13FE81CB9 mov  qword ptr [counter (13FE835D0h)],rax 
13FE81CC0 jmp  testSwitch+19h (13FE81C19h) 
13FE81CC5 call qword ptr [__imp_clock (13FE81128h)] 
13FE81CCB sub  eax,dword ptr [start] 
13FE81CCF imul eax,eax,3E8h 
13FE81CD5 cdq       
13FE81CD6 mov  ecx,3E8h 
13FE81CDB idiv eax,ecx 
13FE81CDD cdqe      
13FE81CDF add  rsp,48h 
13FE81CE3 ret       

Update:

Interesting results here and here. Not sure why one is faster and one is slower, though.

There are several optimizations a compiler can make on a switch. I don't think the oft-mentioned "jump-table" is a very useful one though, as it only works when the input can be bounded some way.

C Pseudocode for a "jump table" would be something like this -- note that the compiler in practice would need to insert some form of if test around the table to ensure that the input was valid in the table. Note also that it only works in the specific case that the input is a run of consecutive numbers.

Moreover, on modern CPUs, the cache locality cost of storing the jump table may often be greater than the elided IF tests.

If the number of branches in a switch is extremely large, a compiler can do things like using binary search on the values of the switch, which (in my mind) would be a much more useful optimization, as it does significantly increase performance in some scenarios, is as general as a switch is, and does not result in greater generated code size. But to see that, your test code would need a LOT more branches to see any difference.

To answer your specific questions:

  1. I don't know x86 assembler, sorry. :(
  2. I can say that it is not using a jump table -- 4 comparison instructions are clearly visible:

    13FE81C51 cmp  qword ptr [rsp+30h],1 
    13FE81C57 je   testSwitch+73h (13FE81C73h) 
    13FE81C59 cmp  qword ptr [rsp+30h],2 
    13FE81C5F je   testSwitch+87h (13FE81C87h) 
    13FE81C61 cmp  qword ptr [rsp+30h],3 
    13FE81C67 je   testSwitch+9Bh (13FE81C9Bh) 
    13FE81C69 cmp  qword ptr [rsp+30h],4 
    13FE81C6F je   testSwitch+0AFh (13FE81CAFh) 
    

    A jump table based solution does not use comparison at all.

  3. Either not enough branches to cause the compiler to generate a jump table, or your compiler simply doesn't generate them. I'm not sure which.

What is "int i = 1;Why (i >= 60 * 60 * 1000 / 1 * 1000)" true?

31 votes

First, defining two constant expressions without parentheses is my fault:

#define BIG_INTERVAL 60 * 60 * 1000
#define SMALL_INTERVAL 1 * 1000

int i = 1;

if (i >= BIG_INTERVAL / SMALL_INTERVAL - 1)
{
    printf("Oops!\n");
}

The if statement after the macro expansion is if(i >= 60 * 60 * 1000 / 1 * 1000 - 1).

That is not my intention. But I find something strange if I write if (i >= 3600000000 - 1). It is false.

What type is 60 * 60 * 1000 / 1 * 1000 - 1 ? int?

All operators on ints return int. So yes, 60 * 60 * 1000 / 1 * 1000 - 1 is an int. But the expected result of 3599999999 is too big for an int, so the expression actually evaluates to -694967297 (assuming 32-bit int and two's complement).

This doesn't happen with a literal 3600000000 because integer literals larger than INT_MAX are of a type that can hold the full value.

What is the purpose of the unary '+' operator in C?

Asked on Sat, 09 Jul 2011 by zneak c
29 votes

In C, it's legal to write something like:

int foo = +4;

However, as far as I can tell, it's a no-op. Is it?

As per the C90 standard in 6.3.3.3:

The result of the unary + operator is the value of its operand. The integral promotion is performed on the operand. and the result has the promoted type.

and

The operand of the unary + or - operator shall have arithmetic type..

Emulate "double" using 2 "float"s

22 votes

I am writing a program for an embedded hardware that only support 32-bit single-precision floating point arithmetic. The algorithm I am implementing, however, requires a 64-bit double-precision addition and comparison. I am trying to emulate "double" datatype using a tuple of two floats. So a double d will be emulated as struct containing the tuple: (float d.hi, float d.low).

The comparison should be straightforward using a lexicographic ordering. The addition however is a bit tricky because I am not sure which base should I use. Should it be FLT_MAX? And how can I detect a carry.

How can this be done?


Edit (Clarity): I need the extra significant digits rather than the extra range.

double-float is a technique that uses pairs of single-precision numbers to achieve almost twice the precision of single precision arithmetic accompanied by a slight reduction of the single precision exponent range (due to intermediate underflow and overflow at the far ends of the range). The basic algorithms were developed by T.J. Dekker and William Kahan in the 1970s. Below I list two fairly recent papers that show how these techniques can be adapted to GPUs, however much of the material covered in these papers is applicable independent of platform so should be useful for the task at hand.

http://hal.archives-ouvertes.fr/docs/00/06/33/56/PDF/float-float.pdf Guillaume Da Graça, David Defour Implementation of float-float operators on graphics hardware, 7th conference on Real Numbers and Computers, RNC7

http://andrewthall.org/papers/df64_qf128.pdf Andrew Thall Extended-Precision Floating-Point Numbers for GPU Computation.

Race condition on x86

17 votes

Could someone explain this statement:

shared variables
x = 0, y = 0

Core 1       Core 2
x = 1;       y = 1;
r1 = y;      r2 = x;

How is it possible to have r1 == 0 and r2 == 0 on x86 processors?

Source "The Language of Concurrency" by Bartosz Milewski.

The problem can arise due to optimizations involving reordering of instructions. In other words, both processors can assign r1 and r2 before assigning variables x and y, if they find that this would yield better performance.

This can be solved by adding a memory barrier, which would enforce the ordering constraint.

[Edit] To quote the slideshow you mentioned in your post:

Modern multicores/languages break sequential consistency.

[Edit2] Regarding the x86 architecture, the best resource to read is Intel® 64 and IA-32 Architectures Software Developer’s Manual (Chapter 8.2 Memory Ordering). Sections 8.2.1 and 8.2.2 describe the memory-ordering implemented by Intel486, Pentium, Intel Core 2 Duo, Intel Atom, Intel Core Duo, Pentium 4, Intel Xeon, and P6 family processors: a memory model called processor ordering, as opposed to program ordering (strong ordering) of the older Intel386 architecture (where read and write instructions were always issued in the order they appeared in the instruction stream).

The manual describes many ordering guarantees of the processor ordering memory model (such as Loads are not reordered with other loads, Stores are not reordered with other stores, Stores are not reordered with older loads etc.), but it also describes the allowed reordering rule which causes the race condition in the OP's post:

8.2.3.4 Loads May Be Reordered with Earlier Stores to Different Locations

On the other hand, if the original order of the instructions was switched:

shared variables
x = 0, y = 0

Core 1       Core 2
r1 = y;      r2 = x;
x = 1;       y = 1;

Processor guarantees that r1 = 1 and r2 = 1 situation is not allowed (due to 8.2.3.3 Stores Are Not Reordered With Earlier Load guarantee), meaning that those instructions would never be reordered in individual cores.

To compare this with different architectures, check out this article: Memory Ordering in Modern Microprocessors (this image specifically). You can see that Itanium (IA-64) does even more reordering than the IA-32 architecture.

What does an expression like arr[''hi there"] imply?

17 votes

If a=3 and b=5 what does this imply?

printf(&a["Ya!Hello! how is this? %s\n"], &b["junk/super"]);

I know that arr[4] means *(arr+4) so I need to know what does an expression like "hi there" imply?

EDIT - Question in probably clearer terms:

When a string is used as an array subscript what value does it convey ?

Why is output of above Hello! how is this? super ?

That implies, the printf becomes equivalent to this:

printf("Hello! how is this? %s\n", "super");

which will print:

Hello! how is this? super

Online demo : http://ideone.com/PVzUP

Explanation:

When we write char s[]="nawaz; and then s[2] means 3rd character in the string s. We can express this by writing "nawaz"[2] which also means 3rd character in the string "nawaz". We can also write 2["nawaz"] which also means 3rd character in the string. In your code, the printf uses the last form, i.e of the form of 2["nawaz"]. Its unusual, though.

So a["Ya!Hello! how is this? %s\n"] means 4th character in the string (as the value of a is 3), and if you add & infront of a then &a["Ya!Hello! how is this? %s\n"] returns the address of the 4th character in the string, that means, in the printf it becomes equivalent to this:

Hello! how is this? %s\n

And I hope you can interpret the rest yourself.

How are static arrays stored in Java memory?

16 votes

So in a language like C, memory is separated into 5 different parts: OS Kernel, text segment, static memory, dynamic memory, and the stack. Something like this:

Memory Layout

If we declared a static array in C, you had to specify it's size beforehand after that would be fixed forevermore. The program would allocate enough memory for the array and stick it in the static data segment as expected.

However I noticed that in Java, you could do something like this:

public class Test {
        static int[] a = new int[1];

        public static void main( String[] args ) {
                a = new int[2];
        }
} 

and everything would work as you'd expect. My question is, why does this work in Java?

EDIT: So the consensus is that an int[] in Java is acts more similarly to an int* in C. So as a follow up question, is there any way to allocate arrays in static memory in Java (if no, why not)? Wouldn't this provide quicker access to such arrays? EDIT2: ^ this is in a new question now: Where are static class variables stored in memory?

In java any time you use the word new, memory for that object is allocated on the heap and a reference is returned. This is also true for arrays. The int[] a is just the reference to new int[1]. When you do new int[2], a new array is allocated and pointed to a. The old array will be garbage collected when needed.

Understanding C built-in library function implementations

Asked on Mon, 11 Jul 2011 by saint c
16 votes

So I was going through K&R second edition doing the exercises. Feeling pretty confident after doing few exercises I thought I'd check the actual implementations of these functions. It was then my confidence fled the scene. I could not understand any of it.

For example I check the getchar():

Here is the prototype in libio/stdio.h

extern int getchar (void);

So I follow it through it and gets this:

__STDIO_INLINE int
getchar (void)
{
  return _IO_getc (stdin);
}

Again I follow it to the libio/getc.c:

int
_IO_getc (fp)
     FILE *fp;
{
  int result;
  CHECK_FILE (fp, EOF);
  _IO_acquire_lock (fp);
  result = _IO_getc_unlocked (fp);
  _IO_release_lock (fp);
  return result;
}

And I'm taken to another header file libio/libio.h, which is pretty cryptic:

#define _IO_getc_unlocked(_fp) \
       (_IO_BE ((_fp)->_IO_read_ptr >= (_fp)->_IO_read_end, 0) \
    ? __uflow (_fp) : *(unsigned char *) (_fp)->_IO_read_ptr++)

Which is where I finally ended my journey.

My question is pretty broad. What does all this mean? I could not for the life of me figure out anything logical out of it by looking at the code. Looks like a bunch of codes abstracted away layers after layer.

More importantly when does it really get the character from stdin

_IO_getc_unlocked is an inlinable macro. The idea is that you can get a character from the stream without having to call a function, making it hopefully fast enough to use in tight loops, etc.

Let's take it apart one layer at a time. First, what is _IO_BE?

/usr/include/libio.h:# define _IO_BE(expr, res) __builtin_expect ((expr), res)

_IO_BE is a hint to the compiler, that expr will usually evaluate to res. It's used to structure code flow to be faster when the expectation is true, but has no other semantic effect. So we can get rid of that, leaving us with:

#define _IO_getc_unlocked(_fp) \
  ( ( (_fp)->_IO_read_ptr >= (_fp)->_IO_read_end ) \
    ? __uflow(_fp) : *(unsigned char *)(_fp)->_IO_read_ptr++) )

Let's turn this into an inline function for clarity:

inline int _IO_getc_unlocked(FILE *fp) {
  if (_fp->_IO_read_ptr >= _fp->_IO_read_end)
    return __uflow(_fp);
  else
    return *(unsigned char *)(_fp->_IO_read_ptr++);
}

In short, we have a pointer into a buffer, and a pointer to the end of the buffer. We check if the pointer is outside the buffer; if not, we increment it and return whatever character was at the old value. Otherwise we call __uflow to refill the buffer and return the newly read character.

As such, this allows us to avoid the overhead of a function call until we actually need to do IO to refill the input buffer.

Keep in mind that standard library functions can be complicated like this; they can also use extensions to the C language (such as __builtin_expect) that are NOT standard and may NOT work on all compilers. They do this because they need to be fast, and because they can make assumptions about what compiler they're using. Generally speaking your own code should not use such extensions unless absolutely necessary, as it'll make porting to other platforms more difficult.

Naming Conflict in C++: How to access a struct member called "class"

15 votes

I came across a naming problem while working with the xlib library:

I'm using a struct which has a member called "class". I assume this library is mostly used in plain C programs. So there's no problem.

But I'm programming in C++ and here the name "class" is a keyword and cannot be used to denote variables. So, if I'm accessing the struct via

myvariable = mystruct->class;

I'm getting the error:

expected unqualified-id before ‘class’

Given that I cannot change the struct itself, how can I access this struct member despite the naming conflict?

You say that you're using XLib. I can only find two places in my Xlib.h where class is used as a structure member: Visual and XWindowAttributes. In both cases, the offending member is wrapped like this:

#if defined(__cplusplus) || defined(c_plusplus)
    int c_class;
#else
    int class;
#endif

Similar hackery appears in XColormapEvent to take care of the new member.

So you should be fine unless your C++ compiler isn't defining any of the necessary macros; but that would also break the usual extern "C" { ... } wrappers as well so the problem is most likely elsewhere. If you're using a struct that isn't part of the standard XLib then you should apply the above hack by hand and have a stern discussion with the library's author (and if that's you then angrily talk to yourself for a bit and we'll pretend not to listen).

If you are having trouble with the XLib structs, then try using the C++ version of the member names:

myvariable = mystruct->c_class;
mynew      = ev->c_new;

In C or C++, is there a directive similar to #ifndef for typedefs?

14 votes

If I want to define a value only if it is not defined, I do something like this :

#ifndef THING
#define THING OTHER_THING
#endif

What if THING is a typedef'd identifier, and not defined? I would like to do something like this:

#ifntypedef thing_type
typedef uint32_t thing_type
#endif

The issue arose because I wanted to check to see if an external library has already defined the boolean type, but I'd be open to hearing a more general solution.

No there is no such facility in C++ at preprocessing stage. At the max can do is

#ifndef thing_type
#define thing_type uint32_t 
#endif

Though this is not a good coding practice and I don't suggest it.

Error reporting in a C library

7 votes

I am looking for a robust way to report errors in a C library. Consider the simple example of a queue:

struct queue *q = malloc(sizeof(*q));
if (NULL == q) {
    /* malloc failed. now what ? */
    return NULL; /* maybe ? */
}

Okay, so for that example returning NULL isn't otherwise valid so it makes sense to return it to signal an error. But

void *get_data()
{
    /* stuff */

    /* Error detected. NULL is a valid return, now what ? */

    /* stuff */
}

What's more, once we signal an error, how to signal what is the error ? I have thought about it and don't have a satisfying solution.

  • Using errno or some other global object isn't something I would like to do (perhaps the functions may be called from multiple threads etc).

  • I thought of making the client supply some "status" object that can be inspected after the call, but that would make the API quite ugly.

So what's your take on the subject ? How do you report errors in a clean way ?

int get_data(void **ptr)

If there are no obvious 'error returns', then maybe your output value should not be the return value. The error could either be an errno, some other custom detailed error value (*cough* HRESULT), just true/false for if the function succeeded, or some other bit of useful information (the length of the data, or -1 if error)

Manipulating "sticky bit" within a C program

7 votes

How do we set, reset and check the "sticky bit" from within a C program?

Thanks

To read the stick bit you use stat() check the .st_mode for S_ISVTX

#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>

struct stat file_stats;
stat("my_file", &file_stats);
if (file_stats.st_mode & S_ISVTX)
    printf("sticky\n");

to reset it, you do it via chmod

struct stat file_stats;
stat("my_file", &file_stats);
mode_t new_mode = file_stats.st_mode & ~S_ISVTX;
chmod("my_file", new_mode);

to set it, chmod it is

struct stat file_stats;
stat("my_file", &file_stats);
mode_t new_mode = file_stats.st_mode | S_ISVTX;
chmod("my_file", new_mode);

this code is untested.

man pages: stat(2) chmod(2)

perl process gets stuck with «*** glibc detected *** perl: corrupted double-linked list: 0x0000000001474b40 ***» - how can I make it terminate?

7 votes

I've been trying to debug a memory corruption with perl 5 and XML::LibXML (which I now maintain). Now here's what I'm getting:

shlomif@lap:~/progs/perl/cpan/XML/LibXML/bugs/perl-xml-libxml-bugs/XML-LibXML-reader-segfault$ make test
XML_CATALOG_FILES="`pwd`/sgml-lib/catalog.xml" perl find_ascii_quotes.pl index.html
*** glibc detected *** perl: corrupted double-linked list: 0x0000000001474b40 ***
^Cmake: *** [test] Interrupt

And then the perl process gets stuck and won't return to the shell. Thing is, the fact that the perl process gets halted interrupts my flow and I'm looking for a way for the process to just crash or whatever instead of hanging there. gdb reports the stuff below. One can find the code in question in its Mercurial repository (just run "make test"), and I'm on Mandriva Linux Cooker on x86-64.

Any help will be appreciated.

ader-segfault$ gdb --command=cmds.gdb /usr/bin/perl
GNU gdb (GDB) 7.1-5 (Mandriva Linux release 2011.0)
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-mandriva-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /usr/bin/perl...
warning: the debug information found in "/usr/lib/debug//usr/bin/perl5.12.3.debug" does not match "/usr/bin/perl" (CRC mismatch).


warning: the debug information found in "/usr/lib/debug/usr/bin/perl5.12.3.debug" does not match "/usr/bin/perl" (CRC mismatch).

(no debugging symbols found)...done.
warning: the debug information found in "/usr/lib/debug//usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so.debug" does not match "/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so" (CRC mismatch).

warning: the debug information found in "/usr/lib/debug/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so.debug" does not match "/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so" (CRC mismatch).

[Thread debugging using libthread_db enabled]
warning: the debug information found in "/usr/lib/debug//usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/Encode/Encode.so.debug" does not match "/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/Encode/Encode.so" (CRC mismatch).

warning: the debug information found in "/usr/lib/debug/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/Encode/Encode.so.debug" does not match "/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/Encode/Encode.so" (CRC mismatch).

warning: the debug information found in "/usr/lib/debug//usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/Data/Dumper/Dumper.so.debug" does not match "/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/Data/Dumper/Dumper.so" (CRC mismatch).

warning: the debug information found in "/usr/lib/debug/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/Data/Dumper/Dumper.so.debug" does not match "/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/Data/Dumper/Dumper.so" (CRC mismatch).

warning: the debug information found in "/usr/lib/debug//usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/IO/IO.so.debug" does not match "/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/IO/IO.so" (CRC mismatch).

warning: the debug information found in "/usr/lib/debug/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/IO/IO.so.debug" does not match "/usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/auto/IO/IO.so" (CRC mismatch).

warning: the debug information found in "/usr/lib/debug//usr/lib/perl5/vendor_perl/5.12.3/x86_64-linux-thread-multi/auto/XML/LibXML/LibXML.so.debug" does not match "/usr/lib/perl5/vendor_perl/5.12.3/x86_64-linux-thread-multi/auto/XML/LibXML/LibXML.so" (CRC mismatch).

warning: the debug information found in "/usr/lib/debug/usr/lib/perl5/vendor_perl/5.12.3/x86_64-linux-thread-multi/auto/XML/LibXML/LibXML.so.debug" does not match "/usr/lib/perl5/vendor_perl/5.12.3/x86_64-linux-thread-multi/auto/XML/LibXML/LibXML.so" (CRC mismatch).

*** glibc detected *** /usr/bin/perl: corrupted double-linked list: 0x0000000000b83440 ***
^C
Program received signal SIGINT, Interrupt.
0x00007ffff6abe12e in __lll_lock_wait_private () from /lib64/libc.so.6
Missing debug package(s), you should install: perl-debug-5.12.3-8.x86_64
(gdb) bt
#0  0x00007ffff6abe12e in __lll_lock_wait_private () from /lib64/libc.so.6
#1  0x00007ffff6a4f7a1 in _L_lock_9854 () from /lib64/libc.so.6
#2  0x00007ffff6a4d6eb in malloc () from /lib64/libc.so.6
#3  0x00007ffff6a43d6d in __libc_message () from /lib64/libc.so.6
#4  0x00007ffff6a49bfa in malloc_printerr () from /lib64/libc.so.6
#5  0x00007ffff6a49f38 in malloc_consolidate.part.3 () from /lib64/libc.so.6
#6  0x00007ffff6a4a749 in _int_free () from /lib64/libc.so.6
#7  0x00007ffff4e8b0e0 in xmlHashFree__internal_alias (table=0xb2db40, 
    f=0x7ffff4e98e00 <xmlFreeAttribute>) at hash.c:324
#8  0x00007ffff4e82e42 in xmlFreeDtd__internal_alias (cur=0x633310)
    at tree.c:1126
#9  0x00007ffff4e8259a in xmlFreeDoc__internal_alias (cur=0x635100)
    at tree.c:1227
#10 0x00007ffff51e75a5 in PmmREFCNT_dec ()
   from /usr/lib/perl5/vendor_perl/5.12.3/x86_64-linux-thread-multi/auto/XML/LibXML/LibXML.so
#11 0x00007ffff51c4f65 in XS_XML__LibXML__Node_DESTROY ()
   from /usr/lib/perl5/vendor_perl/5.12.3/x86_64-linux-thread-multi/auto/XML/LibXML/LibXML.so
#12 0x00007ffff7b1d60d in Perl_pp_entersub ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#13 0x00007ffff7ab9242 in Perl_call_sv ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
---Type <return> to continue, or q <return> to quit---
#14 0x00007ffff7b22cfa in Perl_sv_clear ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#15 0x00007ffff7b23552 in Perl_sv_free2 ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#16 0x00007ffff7b2314d in Perl_sv_clear ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#17 0x00007ffff7b23552 in Perl_sv_free2 ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#18 0x00007ffff51a6ad9 in XS_XML__LibXML__Reader__DESTROY ()
   from /usr/lib/perl5/vendor_perl/5.12.3/x86_64-linux-thread-multi/auto/XML/LibXML/LibXML.so
#19 0x00007ffff7b1d60d in Perl_pp_entersub ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#20 0x00007ffff7b14d70 in Perl_runops_standard ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#21 0x00007ffff7ab9083 in Perl_call_sv ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#22 0x00007ffff7b22cfa in Perl_sv_clear ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#23 0x00007ffff7b23552 in Perl_sv_free2 ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#24 0x00007ffff7b4a45f in Perl_leave_scope ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
---Type <return> to continue, or q <return> to quit---
#25 0x00007ffff7b15ad2 in Perl_pp_unstack ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#26 0x00007ffff7b14d70 in Perl_runops_standard ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#27 0x00007ffff7abed6e in perl_run ()
   from /usr/lib/perl5/5.12.3/x86_64-linux-thread-multi/CORE/libperl.so
#28 0x0000000000400e29 in main ()
(gdb) 

You can control the behaviour of glibc's memory checking with the MALLOC_CHECK_ environment variable. If you set this to '3' then it will print a message and abort() on any detected error.

See glibc's heap consistency checking documentation for for information - this actually might help you debug the error as enabling MALLOC_CHECK_ will perform consistency checks more often than it does by default (and hence catch the error sooner).

You may also want to look into Valgrind (if you haven't already) to assist in finding your corruption issue.

c preprocessor macro ^ symbol?

7 votes

looking at this page: http://www.mikeash.com/pyblog/friday-qa-2010-12-31-c-macro-tips-and-tricks.html

I found this snippet of code with ^{ ... }() syntax, what are the caret/brackets doing?

#define MAX(x, y) (^{ \
    int my_localx = (x); \
    int my_localy = (y); \
    return my_localx > my_localy ? (my_localx) : (my_localy); \
}())

It looks like its creating an anonymous function or something. What is this concept called? Where can I read about it?

It's a C block. It's quite like an anonymous function (in use, not in structure). You can read more about them on Mike Ash's site and in Apple's documentation.

Hide password input on terminal

6 votes

I want to mask my password while writing it with '*'. I use Linux GCC fro this code. I know one solution is to use getch() function like this

#include <conio.h>   
int main()
{
    char c,password[10];
    int i;
    while( (c=getch())!= '\n');{
        password[i] = c;
        printf("*");
        i++;
    }
    return 1;
}

but the problame is that GCC does not include "conio.h" file so, getch() is useless for me. Please, give me solution. Thanks in advance.

Hmm in the Linux world masking isn't usually done with asterixs, normally echoing is just turned off and the terminal displays blanks eg if you use su or log into a virtual terminal etc.

There is a library function to handle getting passwords, it won't mask the password with asterix's but will disable echoing of the password to terminal I pulled this out of a linux book I have. I believe its part of the posix standard

#include <unistd.h>
char *getpass(const char *prompt);

/*Returns pointer to statically allocated input password string
on success, or NULL on error*/

The getpass() function first disables echoing and all processing of terminal special characters (such as the interrupt character, normally Control-C).

It then prints the string pointed to by prompt, and reads a line of input, returning the null-terminated input string with the trailing newline stripped, as its function result.

A google search for getpass() has a reference to the GNU implementation (should be in most linux distros) and some sample code for implemnting your own if need be

http://www.gnu.org/s/hello/manual/libc/getpass.html

Thier example for rolling your own:

#include <termios.h>
#include <stdio.h>

 ssize_t
 my_getpass (char **lineptr, size_t *n, FILE *stream)
 {
   struct termios old, new;
   int nread;

   /* Turn echoing off and fail if we can't. */
   if (tcgetattr (fileno (stream), &old) != 0)
     return -1;
   new = old;
   new.c_lflag &= ~ECHO;
   if (tcsetattr (fileno (stream), TCSAFLUSH, &new) != 0)
     return -1;

   /* Read the password. */
   nread = getline (lineptr, n, stream);

   /* Restore terminal. */
   (void) tcsetattr (fileno (stream), TCSAFLUSH, &old);

   return nread;
 }

If need be you could use this as the basis as modify it to display asterixs.

Why is an unsigned int 1 lower than a char y -1?

Asked on Sat, 02 Jul 2011 by Furqan c
5 votes
 main()
{
    unsigned x=1;
    char y=-1;

    if(x>y)
          printf("x>y");
    else
        printf("x<=y");
}

I expected x>y. but when i changed unsigned int to signed int, i got expected results.

When using signed and unsigned in single operation the signed got promoted to unsigned by C's automatic type conversion. If the bit patter of -1 is considered an unsigned number then it is a very very high value. So x > y is false.

5 votes

In that program I want to increment IP address. And I see output like that:

125.23.45.67
126.23.45.67
127.23.45.67 
128.23.45.67
129.23.45.67
130.23.45.67
131.23.45.67
132.23.45.67
133.23.45.67
134.23.45.67

But I want to see output like this:

124.23.45.67
124.23.45.68
124.23.45.68 
124.23.45.70
124.23.45.71
124.23.45.72
124.23.45.73
124.23.45.74
124.23.45.75
124.23.45.76

Here is program code:

#include <stdlib.h>
#include <stdio.h>
#include <iostream>
using namespace std;
#include "winsock2.h"
#pragma comment(lib,"wsock32.lib")

void main()
{
in_addr adr1;
in_addr adr2;
int i;

adr1.s_addr=inet_addr("124.23.45.67");
adr2.s_addr=inet_addr("as.34.34.56");
if (adr1.s_addr!=INADDR_NONE)
    cout << " adr1 correct" << endl;
else
    cout << " adr1 incorect " << endl;

if (adr2.s_addr!=INADDR_NONE)
    cout << " adr2 correct" << endl;
else
    cout << " adr2 incorect" << endl;

cout << inet_ntoa(adr1) << endl;
cout << inet_ntoa(adr2) << endl;

for (i=0;i<10;i++)
{
    adr1.s_addr ++;
    cout << inet_ntoa(adr1) << endl;
}
}

Big endian and little endian gets another one! Use htonl and ntohl to convert back and forth.

for (i=0;i<10;i++)
{
    adr1.s_addr  = htonl(ntohl(adr1.s_addr) + 1);

    cout << inet_ntoa(adr1) << endl;
}

Is this kind of behavior defined by standard?

Asked on Fri, 01 Jul 2011 by Je Rog c
4 votes
#include <unistd.h>
int main(int argc, char* argv[])
{
  char buf[500];
  read(0, buf, 5);
  return 0;
}

The above read 5 characters from stdin,but if I input more than 5:

12345morethan5
[root@ test]# morethan5
-bash: morethan5: command not found

The remaining characters will be executed as shell commands.

Is this kind of behavior defined by standard?

Sort of :-)

Your program reads 5 characters, and that's it. Not less, not more. The rest remain in the terminal buffer and get sent to your shell once your C program terminates.

Since you are using read(), which is a raw system call, instead of any of the C stdio buffering alternatives this behaviour is not just expected, but required.

From the POSIX standard on read():

The read() function shall attempt to read nbyte bytes from the file associated with the open file descriptor, fildes, into the buffer pointed to by buf.

...

Upon successful completion, where nbyte is greater than 0, read() shall mark for update the st_atime field of the file, and shall return the number of bytes read. This number shall never be greater than nbyte.

...

Upon successful completion, read() [XSI] [Option Start] and pread() [Option End] shall return a non-negative integer indicating the number of bytes actually read.

I.e. read() should never read more bytes from the file descriptor than requested.

From the related part on terminals:

It is not, however, necessary to read a whole line at once; any number of bytes, even one, may be requested in a read() without losing information.

...

The last process to close a terminal device file shall cause any output to be sent to the device and any input to be discarded.

Note: normally your shell will still have an open file descriptor for the terminal, until you end the session.

In Microchip C18, why does the insertion of a NOP cause much larger code?

4 votes

I have some code in an ISR. The code is given for completeness, the question is only about the commented-out _asm block.

Without the _asm block, this is compiled into 82 instructions. With the _asm block, the result is 107 instructions long. Why the big difference?

Here's the C code:

if (PIR1bits.SSPIF)
{
    spi_rec_buffer.read_cursor = 0;
    spi_rec_buffer.write_cursor = 0;

    LATAbits.LATA4 ^= 1;
//      _asm nop nop _endasm
    LATAbits.LATA4 ^= 1;

    while (!PORTAbits.NOT_SS && spi_rec_buffer.write_cursor < spi_rec_buffer.size)
    {
        spi_rec_buffer.data[spi_rec_buffer.write_cursor] = SSPBUF;
        SSPBUF = spi_out_msg_buffer.data[spi_out_msg_buffer.read_cursor];
        PIR1bits.SSPIF = 0;
        spi_rec_buffer.write_cursor++;
        spi_out_msg_buffer.read_cursor++;
        if (spi_out_msg_buffer.read_cursor == spi_out_msg_buffer.write_cursor)
            LATAbits.LATA4 = 0;
        LATBbits.LATB1 = 1;
        while (!PORTAbits.NOT_SS && !PIR1bits.SSPIF);
        LATBbits.LATB1 = 0;
    }

    spi_message_locked = true;
    spi_message_received = true;

}

Without NOPs:

BTFSS     0x9e,0x3,0x0      if (PIR1bits.SSPIF)
BRA       0x2ba
                            {
MOVLB     0xf                   spi_rec_buffer.read_cursor = 0;
CLRF      0x4,0x1
CLRF      0x5,0x1
CLRF      0x6,0x1               spi_rec_buffer.write_cursor = 0;
CLRF      0x7,0x1
BTG       0x89,0x4,0x0          LATAbits.LATA4 ^= 1;
BTG       0x89,0x4,0x0          LATAbits.LATA4 ^= 1;
MOVF      0x80,0x0,0x0          while (!PORTAbits.NOT_SS && spi_rec_buffer.write_cursor < spi_rec_buffer.size)
ANDLW     0x20
BNZ       0x2b0
MOVLB     0xf
MOVF      0x7,0x0,0x1
XORWF     0x3,0x0,0x1
BTFSS     0xe8,0x7,0x0
BRA       0x254
RLCF      0x3,0x0,0x1
BRA       0x25c
MOVF      0x2,0x0,0x1
SUBWF     0x6,0x0,0x1
MOVF      0x3,0x0,0x1
SUBWFB    0x7,0x0,0x1
BC        0x2b0
BRA       0x240
                                {
MOVF      0x0,0x0,0x1               spi_rec_buffer.data[spi_rec_buffer.write_cursor] = SSPBUF;
ADDWF     0x6,0x0,0x1
MOVWF     0xe9,0x0
MOVF      0x1,0x0,0x1
ADDWFC    0x7,0x0,0x1
MOVWF     0xea,0x0
MOVFF     0xfc9,0xfef
MOVLB     0xf                       SSPBUF = spi_out_msg_buffer.data[spi_out_msg_buffer.read_cursor];
MOVF      0x10,0x0,0x1
ADDWF     0x14,0x0,0x1
MOVWF     0xe9,0x0
MOVF      0x11,0x0,0x1
ADDWFC    0x15,0x0,0x1
MOVWF     0xea,0x0
MOVF      0xef,0x0,0x0
MOVWF     0xc9,0x0
BCF       0x9e,0x3,0x0              PIR1bits.SSPIF = 0;
MOVLB     0xf                       spi_rec_buffer.write_cursor++;
INCF      0x6,0x1,0x1
MOVLW     0x0
ADDWFC    0x7,0x1,0x1
MOVLB     0xf                       spi_out_msg_buffer.read_cursor++;
INCF      0x14,0x1,0x1
ADDWFC    0x15,0x1,0x1
MOVF      0x16,0x0,0x1              if (spi_out_msg_buffer.read_cursor == spi_out_msg_buffer.write_cursor)
XORWF     0x14,0x0,0x1
BNZ       0x29e
MOVF      0x17,0x0,0x1
XORWF     0x15,0x0,0x1
BNZ       0x29e
BCF       0x89,0x4,0x0                  LATAbits.LATA4 = 0;
BSF       0x8a,0x1,0x0              LATBbits.LATB1 = 1;
MOVF      0x80,0x0,0x0              while (!PORTAbits.NOT_SS && !PIR1bits.SSPIF);
ANDLW     0x20
BNZ       0x2ac
MOVF      0x9e,0x0,0x0
ANDLW     0x8
BZ        0x2a0
BCF       0x8a,0x1,0x0              LATBbits.LATB1 = 0;
                                }
MOVLB     0xf                   spi_message_locked = true;
MOVLW     0x1
MOVWF     0x18,0x1
MOVLB     0xf                   spi_message_received = true;
MOVWF     0x19,0x1
                            }
MOVLW     0x4            }
SUBWF     0xe1,0x0,0x0
BC        0x2c4
CLRF      0xe1,0x0
MOVF      0xe5,0x1,0x0
MOVWF     0xe1,0x0
MOVF      0xe5,0x1,0x0
MOVFF     0xfe7,0xfd9
MOVF      0xe5,0x1,0x0
MOVFF     0xfe5,0xfea
MOVFF     0xfe5,0xfe9
MOVFF     0xfe5,0xfda
RETFIE    0x1

With NOPs:

BTFSS     0x9e,0x3,0x0      if (PIR1bits.SSPIF)
BRA       0x30e
                            {
MOVLB     0xf                   spi_rec_buffer.read_cursor = 0;
CLRF      0x4,0x1
CLRF      0x5,0x1
MOVLB     0xf                   spi_rec_buffer.write_cursor = 0;
CLRF      0x6,0x1
CLRF      0x7,0x1
BTG       0x89,0x4,0x0          LATAbits.LATA4 ^= 1;
NOP                             _asm nop nop _endasm
NOP
BTG       0x89,0x4,0x0          LATAbits.LATA4 ^= 1;
MOVF      0x80,0x0,0x0          while (!PORTAbits.NOT_SS && spi_rec_buffer.write_cursor < spi_rec_buffer.size)
ANDLW     0x20
BNZ       0x302
MOVLB     0xf
MOVF      0x7,0x0,0x1
MOVLB     0xf
XORWF     0x3,0x0,0x1
BTFSS     0xe8,0x7,0x0
BRA       0x27e
RLCF      0x3,0x0,0x1
BRA       0x28c
MOVF      0x2,0x0,0x1
MOVLB     0xf
SUBWF     0x6,0x0,0x1
MOVLB     0xf
MOVF      0x3,0x0,0x1
MOVLB     0xf
SUBWFB    0x7,0x0,0x1
BC        0x302
BRA       0x268
                                {
MOVLB     0xf                       spi_rec_buffer.data[spi_rec_buffer.write_cursor] = SSPBUF;
MOVLB     0xf
MOVF      0x0,0x0,0x1
MOVLB     0xf
ADDWF     0x6,0x0,0x1
MOVWF     0xe9,0x0
MOVLB     0xf
MOVLB     0xf
MOVF      0x1,0x0,0x1
MOVLB     0xf
ADDWFC    0x7,0x0,0x1
MOVWF     0xea,0x0
MOVFF     0xfc9,0xfef
MOVLB     0xf                       SSPBUF = spi_out_msg_buffer.data[spi_out_msg_buffer.read_cursor];
MOVLB     0xf
MOVF      0x10,0x0,0x1
MOVLB     0xf
ADDWF     0x14,0x0,0x1
MOVWF     0xe9,0x0
MOVLB     0xf
MOVLB     0xf
MOVF      0x11,0x0,0x1
MOVLB     0xf
ADDWFC    0x15,0x0,0x1
MOVWF     0xea,0x0
MOVF      0xef,0x0,0x0
MOVWF     0xc9,0x0
BCF       0x9e,0x3,0x0              PIR1bits.SSPIF = 0;                           // Interruptflag löschen...
MOVLB     0xf                       spi_rec_buffer.write_cursor++;
INCF      0x6,0x1,0x1
MOVLW     0x0
ADDWFC    0x7,0x1,0x1
MOVLB     0xf                       spi_out_msg_buffer.read_cursor++;
INCF      0x14,0x1,0x1
MOVLW     0x0
ADDWFC    0x15,0x1,0x1
MOVLB     0xf                       if (spi_out_msg_buffer.read_cursor == spi_out_msg_buffer.write_cursor)
MOVF      0x16,0x0,0x1
MOVLB     0xf
XORWF     0x14,0x0,0x1
BNZ       0x2ea
MOVLB     0xf
MOVF      0x17,0x0,0x1
MOVLB     0xf
XORWF     0x15,0x0,0x1
BNZ       0x2ee
BCF       0x89,0x4,0x0                  LATAbits.LATA4 = 0;
BSF       0x8a,0x1,0x0              LATBbits.LATB1 = 1;
MOVF      0x80,0x0,0x0              while (!PORTAbits.NOT_SS && !PIR1bits.SSPIF);
ANDLW     0x20
BNZ       0x2fe
MOVF      0x9e,0x0,0x0
ANDLW     0x8
BNZ       0x2fe
BRA       0x2f0
BCF       0x8a,0x1,0x0              LATBbits.LATB1 = 0;
                                }
MOVLB     0xf                   spi_message_locked = true;
MOVLW     0x1
MOVWF     0x18,0x1
MOVLB     0xf                   spi_message_received = true;
MOVLW     0x1
MOVWF     0x19,0x1
                            }
MOVLW     0x4            }
SUBWF     0xe1,0x0,0x0
BC        0x318
CLRF      0xe1,0x0
MOVF      0xe5,0x1,0x0
MOVWF     0xe1,0x0
MOVF      0xe5,0x1,0x0
MOVFF     0xfe7,0xfd9
MOVF      0xe5,0x1,0x0
MOVFF     0xfe5,0xfea
MOVFF     0xfe5,0xfe9
MOVFF     0xfe5,0xfda
RETFIE    0x1

Here's a screenshot of a partly diff (click to enlarge): Diff

So that people don't have to guess, here's a statement from the Microchip C18 manual (emphasis added):

It is generally recommended to limit the use of inline assembly to a minimum. Any functions containing inline assembly will not be optimized by the compiler. To write large fragments of assembly code, use the MPASM assembler and link the modules to the C modules using the MPLINK linker.

I think that this is a common situation with inline asm. GCC is an exception - it will optimize the inline assembly along with the surrounding C code; in order to do this correctly, GCC's inline assembly is quite complex (you have to let it know which registers and memory are clobbered).