| | 1 | /* |
| | 2 | * Lock free data structures library |
| | 3 | * Multiple-reader, multiple-writer lock-free LIFO stack |
| | 4 | * Copyright (c) 2006 Ross Bencina <rossb@audiomulch.com> |
| | 5 | * |
| | 6 | * Permission is hereby granted, free of charge, to any person obtaining |
| | 7 | * a copy of this software and associated documentation files |
| | 8 | * (the "Software"), to deal in the Software without restriction, |
| | 9 | * including without limitation the rights to use, copy, modify, merge, |
| | 10 | * publish, distribute, sublicense, and/or sell copies of the Software, |
| | 11 | * and to permit persons to whom the Software is furnished to do so, |
| | 12 | * subject to the following conditions: |
| | 13 | * |
| | 14 | * The above copyright notice and this permission notice shall be |
| | 15 | * included in all copies or substantial portions of the Software. |
| | 16 | * |
| | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| | 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| | 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| | 20 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR |
| | 21 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF |
| | 22 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| | 23 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| | 24 | */ |
| | 25 | |
| | 26 | /* |
| | 27 | * The text above constitutes the entire Lock Free Data Structures Library |
| | 28 | * license; however, the community also makes the following non-binding |
| | 29 | * requests: |
| | 30 | * |
| | 31 | * Any person wishing to distribute modifications to the Software is |
| | 32 | * requested to send the modifications to the original developer so that |
| | 33 | * they can be incorporated into the canonical version. It is also |
| | 34 | * requested that these non-binding requests be included along with the |
| | 35 | * license above. |
| | 36 | */ |
| | 37 | |
| | 186 | |
| | 187 | #endif /* GRAME02_LIFO_C_IMPLEMENTATION */ |
| | 188 | |
| | 189 | |
| | 190 | #if GRAME02_LIFO_MSVC_IA32_INLINE_IMPLEMENTATION |
| | 191 | |
| | 192 | /* |
| | 193 | STDCALL passes arguments right-to-left, and return the value in eax. |
| | 194 | esi, edi, ebx and ebp need to be saved on the stack |
| | 195 | which means that eax, ecx and edx can be trashed by the function |
| | 196 | The called function cleans the stack. |
| | 197 | |
| | 198 | FASTCALL passes the first two arguments in ECX and EDX. The asm versions |
| | 199 | have been written to load the parameters into ECX, EDX so it would be |
| | 200 | really easy to change to fastcall if there was going to be a benefit |
| | 201 | |
| | 202 | The IA32 compare-exchange functions have some constraints about which |
| | 203 | registers can be used as parameters. We use this to determine our register |
| | 204 | allocation choices. |
| | 205 | |
| | 206 | CMPXCHG r/m32, r32 |
| | 207 | r/m32 -- 32 bit memory to update |
| | 208 | eax -- old value |
| | 209 | r32 -- new value |
| | 210 | |
| | 211 | Compare EAX with r/m32. If equal, ZF is set and r32 is loaded into r/m32. |
| | 212 | Else, clear ZF and load r/m32 into EAX |
| | 213 | |
| | 214 | CMPXCHG8B m64 |
| | 215 | m64 -- 64 bit memory to update |
| | 216 | EDX:EAX -- old value |
| | 217 | ECX:EBX -- new value |
| | 218 | |
| | 219 | Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into m64. |
| | 220 | Else, clear ZF and load m64 into EDX:EAX. |
| | 221 | |
| | 222 | In general we avoid creating a proper stack frame as much as possible |
| | 223 | to try to keep things efficient. |
| | 224 | */ |
| | 225 | |
| | 226 | |
| | 227 | __declspec(naked) |
| | 228 | LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_initialize( grame02_lifo_t* lifo ) |
| | 229 | { |
| | 230 | (void) lifo; |
| | 231 | |
| | 232 | __asm{ |
| | 233 | mov ecx, [esp + 4] |
| | 234 | mov [ecx], 0 // lifo->top = NULL; |
| | 235 | ret 4 |
| | 236 | } |
| | 237 | } |
| | 238 | |
| | 239 | |
| | 240 | __declspec(naked) |
| | 241 | LIBLFDS_CALLING_CONVENTION(void) grame02_lifo_push( grame02_lifo_t* lifo, grame02_lifo_node_t *node ) |
| | 242 | { |
| | 243 | (void) lifo; |
| | 244 | (void) node; |
| | 245 | |
| | 246 | // this function is declared __declspec(naked) so we can handle setting up |
| | 247 | // the stack frame ourselves. |
| | 248 | // we don't clobber any callee save registers so we don't need to save any. |
| | 249 | // we don't make any calls or allocate any locals so we don't need to set |
| | 250 | // up the ebp frame pointer, we can reference our parameters directly off esp |
| | 251 | |
| | 252 | // [esp] // return address |
| | 253 | // dword ptr [esp+4] // lifo |
| | 254 | // dword ptr [esp+8] // node |
| | 255 | |
| | 256 | __asm{ |
| | 257 | mov ecx, [esp+4] // lifo ptr into ecx, this is also the address of lifo->top |
| | 258 | mov edx, [esp+8] // node ptr into edx, this is also the address of node->next |
| | 259 | mov eax, [ecx] // value of lifo->top into eax |
| | 260 | |
| | 261 | /* B1: loop */ |
| | 262 | push_loop: |
| | 263 | |
| | 264 | /* B2: node->next = lifo->top # set the cell next pointer to top of the lifo */ |
| | 265 | |
| | 266 | mov [edx], eax // lifo->top into node->next |
| | 267 | |
| | 268 | /* B3: if CAS (&lifo->top, node->next, node ) # try to set the top of |
| | 269 | # the lifo to cell */ |
| | 270 | /* B4: break */ |
| | 271 | /* B5: endif */ |
| | 272 | |
| | 273 | // ecx contains ptr to lifo (a.k.a. &lifo->top) |
| | 274 | // eax contains the previously read value of lifo->top (a.k.a. node->next) |
| | 275 | // edx contains node |
| | 276 | lock cmpxchg [ecx], edx // sets ZF on success, copies [esi] to eax on failure |
| | 277 | // therefore on failure eax contains new lifo->top |
| | 278 | jnz push_loop |
| | 279 | /* B6: endloop */ |
| | 280 | |
| | 281 | ret 8 |
| | 282 | } |
| | 283 | } |
| | 284 | |
| | 285 | |
| | 286 | __declspec(naked) |
| | 287 | LIBLFDS_CALLING_CONVENTION(grame02_lifo_node_t*) grame02_lifo_pop( grame02_lifo_t* lifo ) |
| | 288 | { |
| | 289 | (void) lifo; |
| | 290 | |
| | 291 | // we use the same registers which are forced by CMPXCHG8B, with lifo in esi |
| | 292 | // eax <= top |
| | 293 | // edx <= pop_count |
| | 294 | // ebx <= next |
| | 295 | // ecx <= pop_count + 1 |
| | 296 | |
| | 297 | // result goes in eax |
| | 298 | |
| | 299 | __asm{ |
| | 300 | |
| | 301 | // this version implements a fastpath for empty lifos: we check |
| | 302 | // whether lifo->top is NULL prior to saving callee-save registers on the |
| | 303 | // stack. |
| | 304 | |
| | 305 | // the following code costs us 1 reg-reg move compared to a simpler version |
| | 306 | // with no fastpath. it can process an empty lifo in 5 instead of 10 |
| | 307 | // instructions. in summary, it unrolls the first retry loop and hoists the |
| | 308 | // first empty-lifo test outside the register saving block. |
| | 309 | |
| | 310 | mov ecx, [esp+4] // lifo ptr into edx (a caller save register) |
| | 311 | |
| | 312 | /* SC2: top = lifo->top # get the top node of the lifo */ |
| | 313 | mov eax, [ecx] // lifo->top into eax |
| | 314 | |
| | 315 | /* SC3: if top == NULL */ |
| | 316 | /* SC4: return NULL # LIFO is empty */ |
| | 317 | /* SC5: endif */ |
| | 318 | cmp eax, 0 // if lifo->top is NULL, return NULL |
| | 319 | jnz pop_pre_loop |
| | 320 | ret 4 |
| | 321 | |
| | 322 | pop_pre_loop: |
| | 323 | push ebx // save callee-save registers on stack |
| | 324 | push esi // save callee-save registers on stack |
| | 325 | mov esi, ecx // lifo ptr into esi |
| | 326 | |
| | 327 | /* SC2: pop_count = lifo->pop_count # get the pop operations count */ |
| | 328 | mov edx, [esi + 4] // lifo->pop_count into edx |
| | 329 | |
| | 330 | /* SC6: next = top->next # get the next node of top */ |
| | 331 | |
| | 332 | mov ebx, [eax] // put top->next into ebx |
| | 333 | mov ecx, edx |
| | 334 | inc ecx // pop_count + 1 into ecx |
| | 335 | |
| | 336 | /* SC7: if CAS2 (&lifo->top, top, pc, next, pc + 1) # try to change both |
| | 337 | # the top of the lifo |
| | 338 | # and pop count */ |
| | 339 | |
| | 340 | lock cmpxchg8b [esi] // on failure loads (eax, edx) from [esi] |
| | 341 | // ready for next iteration otherwise eax still |
| | 342 | // contains top ready to return |
| | 343 | jz pop_return |
| | 344 | |
| | 345 | /* SC1: loop */ |
| | 346 | pop_loop: |
| | 347 | |
| | 348 | /* SC3: if top == NULL */ |
| | 349 | /* SC4: return NULL # LIFO is empty */ |
| | 350 | /* SC5: endif */ |
| | 351 | |
| | 352 | cmp eax, 0 // test for NULL top value |
| | 353 | jz pop_return // eax is null so our return value is already set |
| | 354 | |
| | 355 | /* SC6: next = top->next # get the next node of top */ |
| | 356 | |
| | 357 | mov ebx, [eax] // put top->next into ebx |
| | 358 | mov ecx, edx |
| | 359 | inc ecx // pop_count + 1 into ecx |
| | 360 | |
| | 361 | /* SC7: if CAS2 (&lifo->top, top, pc, next, pc + 1) # try to change both |
| | 362 | # the top of the lifo |
| | 363 | # and pop count */ |
| | 364 | |
| | 365 | lock cmpxchg8b [esi] // on failure loads (eax, edx) from [esi] |
| | 366 | // ready for next iteration otherwise eax still |
| | 367 | // contains top ready to return |
| | 368 | jnz pop_loop |
| | 369 | /* SC8: break */ |
| | 370 | /* SC9: endif |
| | 371 | /* SC10: endloop */ |
| | 372 | /* SC11: return top */ |
| | 373 | |
| | 374 | pop_return: |
| | 375 | pop esi |
| | 376 | pop ebx |
| | 377 | ret 4 // return old top (still in eax) |
| | 378 | } |
| | 379 | |
| | 380 | /* ignore "Function should return a value" compiler warning */ |
| | 381 | } |
| | 382 | |
| | 383 | #endif /* GRAME02_LIFO_C_IMPLEMENTATION */ |
| | 384 | |
| | 385 | #if GRAME02_LIFO_GCC_IA32_INLINE_IMPLEMENTATION |
| | 386 | |
| | 387 | // would be nice if someone who knows IA32 assembler could review the above |
| | 388 | // for any obvious optimisations/reorderings before we convert to GCC format |
| | 389 | |
| | 390 | #endif /* GRAME02_LIFO_GCC_IA32_INLINE_IMPLEMENTATION */ |