Changeset 1099 for sfio

Show
Ignore:
Timestamp:
08/26/06 13:55:35 (2 years ago)
Author:
rossb
Message:

sfio: implemented lock-free lifo in IA32 asm, added licence

Location:
sfio/trunk/src/lockfree
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • sfio/trunk/src/lockfree/grame02_lifo.c

    r1098 r1099  
     1/* 
     2 * Lock free data structures library 
     3 * Multiple-reader, multiple-writer lock-free LIFO stack 
     4 * Copyright (c) 2006 Ross Bencina <rossb@audiomulch.com> 
     5 * 
     6 * Permission is hereby granted, free of charge, to any person obtaining 
     7 * a copy of this software and associated documentation files 
     8 * (the "Software"), to deal in the Software without restriction, 
     9 * including without limitation the rights to use, copy, modify, merge, 
     10 * publish, distribute, sublicense, and/or sell copies of the Software, 
     11 * and to permit persons to whom the Software is furnished to do so, 
     12 * subject to the following conditions: 
     13 * 
     14 * The above copyright notice and this permission notice shall be 
     15 * included in all copies or substantial portions of the Software. 
     16 * 
     17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
     18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
     19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
     20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 
     21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
     22 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
     23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
     24 */ 
     25 
     26 /* 
     27 * The text above constitutes the entire Lock Free Data Structures Library 
     28 * license; however, the community also makes the following non-binding 
     29 * requests: 
     30 * 
     31 * Any person wishing to distribute modifications to the Software is 
     32 * requested to send the modifications to the original developer so that 
     33 * they can be incorporated into the canonical version. It is also  
     34 * requested that these non-binding requests be included along with the  
     35 * license above. 
     36 */ 
     37  
    138#include "grame02_lifo.h" 
    239 
     
    542 
    643/* 
    7 fixme: could rename lifo 
     44    Note that the pseudocode in the comments is quoted from the article 
     45    referenced in the header file; however, the variable names have been 
     46    changed to be consistent with those used in the C implementation. 
    847*/ 
    948 
     49/* Set one of the following to 1 to enable the desired implementation */ 
     50 
     51#define GRAME02_LIFO_C_IMPLEMENTATION                   0 
     52#define GRAME02_LIFO_MSVC_IA32_INLINE_IMPLEMENTATION    1 
     53#define GRAME02_LIFO_GCC_IA32_INLINE_IMPLEMENTATION     0 
     54 
     55 
     56#if GRAME02_LIFO_C_IMPLEMENTATION 
    1057 
    1158static int CAS32( volatile void *p32, volatile void *oldValue, volatile void *newValue ) 
     
    55102 
    56103 
    57 LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_initialize( grame02_lifo_t* stack ) 
    58 { 
    59     stack->top = NULL; 
    60 } 
    61  
    62  
    63 LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_push( grame02_lifo_t* stack, grame02_lifo_node_t *node ) 
     104LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_initialize( grame02_lifo_t* lifo ) 
     105{ 
     106    lifo->top = NULL; 
     107} 
     108 
     109 
     110LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_push( grame02_lifo_t* lifo, grame02_lifo_node_t *node ) 
    64111{ 
    65112/* B1: loop                                                                   */ 
    66113    for(;;){ 
    67 /* B2:  cl->next = lf->top # set the cell next pointer to top of the lifo    */ 
    68  
    69         /* On IA32 the CAS32 below ensures the visibility of stack->top to all 
    70          * other threads. Otherwise we may need a read barrier here to ensure 
    71          * that the current value of stack->top is being fetched. 
    72          * ==> actually not 100% this is true if CAS32 fails, should check 
     114/* B2:  node->next = lifo->top # set the cell next pointer to top of the lifo */ 
     115 
     116        /* On IA32 the CAS32 below, and the CAS64 in grame02_lifo_pop ensures 
     117         * the visibility of stack->top to all other threads. Otherwise we may 
     118         * need a read barrier here to ensure that the current value of 
     119         * stack->top is being fetched. 
    73120         */ 
    74121 
    75         node->next = stack->top; 
     122        node->next = lifo->top; 
    76123         
    77 /* B3:  if CAS (&lf->top, cl->next, cl) # try to set the top of 
     124/* B3:  if CAS (&lifo->top, node->next, node ) # try to set the top of 
    78125                                        #                    the lifo to cell */ 
    79126/* B4:   break                                                                */ 
     
    85132         */ 
    86133          
    87         if( CAS32( &stack->top, node->next, node ) ) 
     134        if( CAS32( &lifo->top, node->next, node ) ) 
    88135            break; 
    89136/* B6: endloop                                                                */ 
     
    92139 
    93140 
    94 LIBLFDS_CALLING_CONVENTION(grame02_lifo_node_t*) grame02_lifo_pop( grame02_lifo_t* stack ) 
     141LIBLFDS_CALLING_CONVENTION(grame02_lifo_node_t*) grame02_lifo_pop( grame02_lifo_t* lifo ) 
    95142{ 
    96143    grame02_lifo_node_t *top, *next; 
     
    105152         */ 
    106153 
    107 /* SC2:     head = lf->top # get the top cell of the lifo                     */ 
    108 /* SC2:     oc = lf->ocount # get the pop operations count                    */ 
    109         top = (grame02_lifo_node_t*)stack->top; 
    110         pop_count = stack->pop_count; 
    111  
    112 /* SC3:     if head == NULL                                                   */ 
     154/* SC2:     top = lifo->top # get the top node of the lifo                    */ 
     155/* SC2:     pc = lifo->pop_count # get the pop operations count               */ 
     156        top = (grame02_lifo_node_t*)lifo->top; 
     157        pop_count = lifo->pop_count; 
     158 
     159/* SC3:     if top == NULL                                                    */ 
    113160/* SC4:         return NULL # LIFO is empty                                   */ 
    114161/* SC5:     endif                                                             */ 
     
    120167         */ 
    121168 
    122 /* SC6:     next = head->next # get the next cell of cell                     */ 
     169/* SC6:     next = top->next # get the next node of the top node              */ 
    123170        next = (grame02_lifo_node_t*)top->next; 
    124171 
    125 /* SC7:     if CAS2 (&lf->top, head, oc, next, oc + 1) # try to change both 
    126                                                        # the top of the lifo 
    127                                                        # and pop count        */ 
     172/* SC7:     if CAS2 (&lifo->top, top, pc, next, pc + 1) # try to change both 
     173                                                        # the top of the lifo 
     174                                                        # and pop count       */ 
    128175/* SC8:         break                                                         */ 
    129176/* SC9:     endif 
    130177                                                      */ 
    131         if( CAS64( stack, top, pop_count, next, pop_count + 1 ) ) 
     178        if( CAS64( lifo, top, pop_count, next, pop_count + 1 ) ) 
    132179            break; 
    133180 
    134181/* SC10: endloop                                                              */ 
    135182    } 
    136 /* SC11: return head                                                          */ 
     183/* SC11: return top                                                           */ 
    137184    return top; 
    138185} 
     186 
     187#endif /* GRAME02_LIFO_C_IMPLEMENTATION */ 
     188 
     189 
     190#if GRAME02_LIFO_MSVC_IA32_INLINE_IMPLEMENTATION 
     191 
     192/* 
     193    STDCALL passes arguments right-to-left, and return the value in eax. 
     194    esi, edi, ebx and ebp need to be saved on the stack 
     195    which means that eax, ecx and edx can be trashed by the function 
     196    The called function cleans the stack. 
     197 
     198    FASTCALL passes the first two arguments in ECX and EDX. The asm versions 
     199    have been written to load the parameters into ECX, EDX so it would be 
     200    really easy to change to fastcall if there was going to be a benefit 
     201 
     202    The IA32 compare-exchange functions have some constraints about which 
     203    registers can be used as parameters. We use this to determine our register 
     204    allocation choices. 
     205 
     206    CMPXCHG r/m32, r32 
     207        r/m32   -- 32 bit memory to update 
     208        eax     -- old value 
     209        r32     -- new value 
     210 
     211        Compare EAX with r/m32. If equal, ZF is set and r32 is loaded into r/m32. 
     212        Else, clear ZF and load r/m32 into EAX 
     213 
     214    CMPXCHG8B m64 
     215        m64     -- 64 bit memory to update 
     216        EDX:EAX -- old value 
     217        ECX:EBX -- new value 
     218 
     219        Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into m64. 
     220        Else, clear ZF and load m64 into EDX:EAX. 
     221 
     222    In general we avoid creating a proper stack frame as much as possible 
     223    to try to keep things efficient. 
     224*/ 
     225 
     226 
     227__declspec(naked) 
     228LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_initialize( grame02_lifo_t* lifo ) 
     229{ 
     230    (void) lifo; 
     231     
     232    __asm{ 
     233        mov     ecx, [esp + 4] 
     234        mov     [ecx], 0        // lifo->top = NULL; 
     235        ret     4 
     236    } 
     237} 
     238 
     239 
     240__declspec(naked) 
     241LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_push( grame02_lifo_t* lifo, grame02_lifo_node_t *node ) 
     242{ 
     243    (void) lifo; 
     244    (void) node; 
     245     
     246    // this function is declared __declspec(naked) so we can handle setting up 
     247    // the stack frame ourselves. 
     248    // we don't clobber any callee save registers so we don't need to save any. 
     249    // we don't make any calls or allocate any locals so we don't need to set 
     250    // up the ebp frame pointer, we can reference our parameters directly off esp 
     251 
     252    // [esp]                // return address 
     253    // dword ptr [esp+4]    // lifo 
     254    // dword ptr [esp+8]    // node 
     255 
     256    __asm{ 
     257        mov     ecx, [esp+4]    // lifo ptr into ecx, this is also the address of lifo->top 
     258        mov     edx, [esp+8]    // node ptr into edx, this is also the address of node->next 
     259        mov     eax, [ecx]      // value of lifo->top into eax 
     260         
     261/* B1: loop                                                                   */ 
     262push_loop: 
     263 
     264/* B2:  node->next = lifo->top # set the cell next pointer to top of the lifo */ 
     265 
     266        mov     [edx], eax  // lifo->top into node->next 
     267 
     268/* B3:  if CAS (&lifo->top, node->next, node ) # try to set the top of 
     269                                        #                    the lifo to cell */ 
     270/* B4:   break                                                                */ 
     271/* B5:  endif                                                                 */ 
     272 
     273        // ecx contains ptr to lifo (a.k.a. &lifo->top) 
     274        // eax contains the previously read value of lifo->top (a.k.a. node->next) 
     275        // edx contains node 
     276        lock cmpxchg [ecx], edx     // sets ZF on success, copies [esi] to eax on failure 
     277                                    // therefore on failure eax contains new lifo->top 
     278        jnz     push_loop 
     279/* B6: endloop                                                                */ 
     280 
     281        ret     8 
     282    } 
     283} 
     284 
     285 
     286__declspec(naked) 
     287LIBLFDS_CALLING_CONVENTION(grame02_lifo_node_t*) grame02_lifo_pop( grame02_lifo_t* lifo ) 
     288{ 
     289    (void) lifo; 
     290     
     291    // we use the same registers which are forced by CMPXCHG8B, with lifo in esi 
     292    // eax <= top 
     293    // edx <= pop_count 
     294    // ebx <= next 
     295    // ecx <= pop_count + 1 
     296 
     297    // result goes in eax 
     298 
     299    __asm{ 
     300 
     301    // this version implements a fastpath for empty lifos: we check 
     302    // whether lifo->top is NULL prior to saving callee-save registers on the 
     303    // stack.  
     304 
     305    // the following code costs us 1 reg-reg move compared to a simpler version 
     306    // with no fastpath. it can process an empty lifo in 5 instead of 10 
     307    // instructions. in summary, it unrolls the first retry loop and hoists the 
     308    // first empty-lifo test outside the register saving block. 
     309     
     310        mov     ecx, [esp+4]    // lifo ptr into edx (a caller save register) 
     311 
     312/* SC2:     top = lifo->top # get the top node of the lifo                    */ 
     313        mov     eax, [ecx]      // lifo->top into eax 
     314 
     315/* SC3:     if top == NULL                                                    */ 
     316/* SC4:         return NULL # LIFO is empty                                   */ 
     317/* SC5:     endif                                                             */ 
     318        cmp     eax, 0          // if lifo->top is NULL, return NULL 
     319        jnz     pop_pre_loop 
     320        ret     4 
     321 
     322pop_pre_loop: 
     323        push    ebx             // save callee-save registers on stack 
     324        push    esi             // save callee-save registers on stack 
     325        mov     esi, ecx        // lifo ptr into esi 
     326         
     327/* SC2:     pop_count = lifo->pop_count # get the pop operations count        */ 
     328        mov     edx, [esi + 4]  // lifo->pop_count into edx 
     329 
     330/* SC6:     next = top->next # get the next node of top                       */ 
     331 
     332        mov     ebx, [eax]      // put top->next into ebx 
     333        mov     ecx, edx 
     334        inc     ecx             // pop_count + 1 into ecx 
     335 
     336/* SC7:     if CAS2 (&lifo->top, top, pc, next, pc + 1)  # try to change both 
     337                                                         # the top of the lifo 
     338                                                         # and pop count      */ 
     339                                                          
     340        lock cmpxchg8b [esi]    // on failure loads (eax, edx) from [esi] 
     341                                // ready for next iteration otherwise eax still 
     342                                // contains top ready to return 
     343        jz     pop_return 
     344 
     345/* SC1: loop                                                                  */ 
     346pop_loop: 
     347 
     348/* SC3:     if top == NULL                                                    */ 
     349/* SC4:         return NULL # LIFO is empty                                   */ 
     350/* SC5:     endif                                                             */ 
     351 
     352        cmp     eax, 0          // test for NULL top value 
     353        jz     pop_return       // eax is null so our return value is already set 
     354 
     355/* SC6:     next = top->next # get the next node of top                       */ 
     356 
     357        mov     ebx, [eax]      // put top->next into ebx 
     358        mov     ecx, edx 
     359        inc     ecx             // pop_count + 1 into ecx 
     360 
     361/* SC7:     if CAS2 (&lifo->top, top, pc, next, pc + 1)  # try to change both 
     362                                                         # the top of the lifo 
     363                                                         # and pop count      */ 
     364                                                          
     365        lock cmpxchg8b [esi]    // on failure loads (eax, edx) from [esi] 
     366                                // ready for next iteration otherwise eax still 
     367                                // contains top ready to return 
     368        jnz     pop_loop 
     369/* SC8:         break                                                         */ 
     370/* SC9:     endif 
     371/* SC10: endloop                                                              */ 
     372/* SC11: return top                                                           */ 
     373 
     374pop_return: 
     375        pop     esi 
     376        pop     ebx 
     377        ret     4               // return old top (still in eax) 
     378    } 
     379 
     380    /* ignore "Function should return a value" compiler warning               */ 
     381} 
     382 
     383#endif /* GRAME02_LIFO_C_IMPLEMENTATION */ 
     384 
     385#if GRAME02_LIFO_GCC_IA32_INLINE_IMPLEMENTATION 
     386 
     387// would be nice if someone who knows IA32 assembler could review the above 
     388// for any obvious optimisations/reorderings before we convert to GCC format 
     389 
     390#endif /* GRAME02_LIFO_GCC_IA32_INLINE_IMPLEMENTATION */ 
    139391 
    140392 
  • sfio/trunk/src/lockfree/grame02_lifo.h

    r1098 r1099  
     1/* 
     2 * Lock free data structures library 
     3 * Multiple-reader, multiple-writer lock-free LIFO stack 
     4 * Copyright (c) 2006 Ross Bencina <rossb@audiomulch.com> 
     5 * 
     6 * Permission is hereby granted, free of charge, to any person obtaining 
     7 * a copy of this software and associated documentation files 
     8 * (the "Software"), to deal in the Software without restriction, 
     9 * including without limitation the rights to use, copy, modify, merge, 
     10 * publish, distribute, sublicense, and/or sell copies of the Software, 
     11 * and to permit persons to whom the Software is furnished to do so, 
     12 * subject to the following conditions: 
     13 * 
     14 * The above copyright notice and this permission notice shall be 
     15 * included in all copies or substantial portions of the Software. 
     16 * 
     17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
     18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
     19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
     20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 
     21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
     22 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
     23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
     24 */ 
     25 
     26 /* 
     27 * The text above constitutes the entire Lock Free Data Structures Library 
     28 * license; however, the community also makes the following non-binding 
     29 * requests: 
     30 * 
     31 * Any person wishing to distribute modifications to the Software is 
     32 * requested to send the modifications to the original developer so that 
     33 * they can be incorporated into the canonical version. It is also  
     34 * requested that these non-binding requests be included along with the  
     35 * license above. 
     36 */ 
     37  
    138#ifndef INCLUDED_GRAME02_LIFO_H 
    239#define INCLUDED_GRAME02_LIFO_H 
    340 
    441/* 
    5     multiple-reader, multiple-writer lock-free LIFO stack 
    6  
     42    see: 
     43     
    744    D. Fober, S. Letz, Y. Orlarey 
    845    "Lock-Free Techniques for Concurrent Access to Shared Objects," 
     
    2764typedef struct grame02_lifo_node_t grame02_lifo_node_t; 
    2865 
    29 #pragma pack( push, 4 ) 
     66#pragma pack( push, 4 )     /* our assembler versions assume 4 byte alignment */ 
    3067 
    3168 
    3269typedef struct grame02_lifo_node_t{ 
     70    volatile grame02_lifo_node_t *next; 
    3371    grame02_lifo_value_t value; 
    34     volatile grame02_lifo_node_t *next; 
    3572} grame02_lifo_node_t; 
    3673 
     
    4582 
    4683 
    47 LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_initialize( grame02_lifo_t* stack ); 
     84LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_initialize( grame02_lifo_t* lifo ); 
    4885 
    49 LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_push( grame02_lifo_t* stack, grame02_lifo_node_t *node ); 
     86LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_push( grame02_lifo_t* lifo, grame02_lifo_node_t *node ); 
    5087 
    51 LIBLFDS_CALLING_CONVENTION(grame02_lifo_node_t*) grame02_lifo_pop( grame02_lifo_t* stack ); 
     88LIBLFDS_CALLING_CONVENTION(grame02_lifo_node_t*) grame02_lifo_pop( grame02_lifo_t* lifo ); 
    5289 
    5390LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_test(void);