1: /* mutex.c -- Szymanski's mutual exclusion algorithm for shared memory and distributed processors. */
  2: /* rj@elilabs.com Sat Feb 19 16:14:15 EST 2011 */
  3: 
  4: /* This source file implements a practical version in the C programming language of Boleslaw Szymanski's
  5:    "Four-Bit Robust First-Come First-Served Algorithm" for mutual exclusion (mutex) for shared memory and
  6:    distributed assynchronous processors, as described in Figure 5 of his paper "Mutual Exclusion Revisited",
  7:    published in "Proceedings of the Fifth Jerusalem Conference on Information Technology", Jerusalem, Israel,
  8:    October 1990, IEEE Computer Society Press, Los Alamitos, CA, pp. 110-117.  A copy of this paper is
  9:    available at http://cgi2.cs.rpi.edu/~szymansk/papers/jerus.93.pdf and is an important part of the
 10:    documentation for this source file.
 11: 
 12:    Szymanski's own proof in his paper above was only a sketch, and was not rigorous.  Szymanski's algorithm is
 13:    proven to properly implement mutual exclusion in the paper "Automated Verification of Szymanski's
 14:    Algorithm" by E. Pascal Gribomont and Guy Zenner, in "Lecture Notes in Computer Science", 1998, Volume
 15:    1384, "Tools and Algorithms for the Construction and Analysis of Systems", Pages 424-438, and other proofs
 16:    are also known.  A copy of this paper is at http://www.springerlink.com/content/3663621120865t34/, but
 17:    there is a charge.
 18: 
 19:    The implementation in this source file is intended to support multiple mutexes.  This implementation uses a
 20:    single 3-dimensional array to define all of the shared memory space needed, making it easy to use, as each
 21:    separate processor only needs to know the starting address of this single 3-D array.  This address is
 22:    stored as a pointer in a file-locally scoped static variable, and all accesses to the array are made by
 23:    means of a set of access macros.
 24: 
 25:    Since this mutex implementation uses spin-locks to wait for other tasks, it should only be used with
 26:    critical sections that are of relatively short execution time duration, such as to manipulate the links in
 27:    a linked list data structure, and not to perform lengthy operations.
 28: 
 29:    A task calls the acquire_mutex(task_num, mutex_num) routine with its pre-assigned task number and mutex
 30:    number to acquire that mutex.  The Critical Section is executed after calling and returning from
 31:    acquire_mutex(), and then, after the critical section has finished, release_mutex() is called, thus:
 32: 
 33:        acquire_mutex(my_task_number, mutex_number);
 34:        execute_critical_section();
 35:        release_mutex(my_task_number, mutex_number);
 36: 
 37:    This implementation using a 3-D array of flag bits assumes that any task could request any mutex.  It is
 38:    expected that there will be a relatively small number of tasks and mutexes, and so looping over all of the
 39:    tasks when acquiring a mutex will not impose too much of a performance penalty, but if there are a large
 40:    number of tasks and mutexes, and disjoint sets of task that use different mutexes, or extreme performance
 41:    demands, then a different implementation with each mutex declared separately, and each mutex looping only
 42:    over those tasks that actually use that mutex, could provide an advantage.  */
 43: 
 44: 
 45: /* WARNING This file *MUST NOT* be compiled with compiler optimization set high enough to cause re-ordering of
 46:    the execution of instructions to be different from the order specified in the C source file, or the
 47:    algorithm could fail to operate properly!  Typically, this would be -O2 or higher; -O1 should be safe. */
 48: 
 49: 
 50: #include "mutex.h"                                     /* This is the user tailorable configuration file. */
 51: 
 52: #define NULL 0                                             /* Make sure these are defined! */
 53: #define true 1
 54: #define false 0
 55: 
 56: static volatile flag_bits_t* flag_bits = NULL;       /* The 3-D array of mutex flag bits */
 57: 
 58: 
 59: /* Macros to access the flag_bits array */
 60: 
 61: #define active(task)   (*flag_bits)[mutex][task][a]  /* The task wants to enter the waiting room. */
 62: #define waiting(task)  (*flag_bits)[mutex][task][w]  /* The task is in the waiting room. */
 63: #define shutting(task) (*flag_bits)[mutex][task][s]  /* The task is shutting the door. */
 64: #define parity(task)   (*flag_bits)[mutex][task][p]  /* Used to prevent a task from entering the critical
 65:                                                         section twice before another wiating task has entered
 66:                                                         once. */
 67: 
 68: /* Macro to implement Szymanski's "*=*" reset operator. */
 69: 
 70: #define reset(var, val) if ((var) != (val)) (var) = (val)
 71: 
 72: 
 73: /*** Initialize a mutex. ***/
 74: 
 75: void initialize_mutex(task_num me, volatile flag_bits_t* shared_memory_ptr) {
 76:   int mutex;                                             /* loop index */
 77: 
 78:   flag_bits = shared_memory_ptr;                     /* point to the shared memory region */
 79: 
 80:   for (mutex = 0; mutex < NUM_MUTEXES; mutex++) {    /* initialize this task's own flag bits */
 81:     reset(active(me), false);
 82:     reset(waiting(me), false);
 83:     reset(shutting(me), false);
 84: 
 85:     /* The parity should not be reset.  See Szymanski's paper in the paragraph under Figure 5, where it
 86:        says, "It should be noted that, unlike the other communication variables, the variable p should not
 87:        be reset to initial value at the end of abortions." */
 88: 
 89:   }
 90:   return;
 91: }
 92: 
 93: 
 94: /*** Acquire a mutex. ***/
 95: 
 96: void acquire_mutex(task_num me,                       /* "me" is the calling task's number, and ranges from 0 to
 97:                                                   NUM_TASKS-1; these numbers are pre-assigned to each task
 98:                                                   needing to use a mutex.  This was denoted "i" in Szymanski's
 99:                                                   paper. */
100: 
101:                    mutex_num mutex) {               /* "mutex" is the number of the mutex being acquired.  Since
102:                                                   Szymanski's paper only implements a single mutex, the
103:                                                   "mutex" parameter has no counterpart in Szymanski's
104:                                                   implementation. */
105: 
106:   /* OVERVIEW: The algorithm is modeled on a waiting room with an entry and exit doorway. Initially the entry
107:      door is open and the exit door is closed. All processes which request entry into the critical section at
108:      roughly the same time enter the waiting room; the last of them closes the entry door and opens the exit
109:      door. The processes then enter the critical section one by one. The last process to leave the critical
110:      section closes the exit door and reopens the entry door, so the next batch of processes may enter.  
111:      [From http://en.wikipedia.org/wiki/Szymanski%27s_Algorithm] */
112: 
113:   int them;                                       /* Loop index to loop thru other tasks' flag bits.  This was
114:                                                   denoted "j" in Szymanski's paper. */
115: 
116:   int passes;                                       /* Secondary loop index, denoted "k" in Szymanski's paper. */
117: 
118:   int deadlock_counter = NUM_TASKS -1;               /* The deadlock counter, denoted "c" in Szymanski's paper. */
119: 
120:   /* These arrays are to remember the state of all the other tasks at the time this task enters the
121:      acquire_mutex() routine.  They are private to a single execution of acquire_mutex(), as they are
122:      allocated on the function's stack frame. */
123: 
124:   word last_active[NUM_TASKS];                       /* Denoted "la" in Szymanski's paper. */
125:   word last_parity[NUM_TASKS];                       /* Denoted "lp" in Szymanski's paper. */
126: 
127:   /* NOTE: The comments with 5 stars on each side correspond to the statement labels in Szymanski's paper.
128:      The paper uses these to discuss the various steps of the algorithm.  They are retained here to make it
129:      easier to follow the correspondence between Szymanski's implementation and this implementation. */
130: 
131:   /* NOTE: Since the calling task's task number is "me" in the parameter list, and the use of "me" is
132:      sometimes akward in the narrative, the terms "me" and "we" will be used interchangably in the
133:      comments. */
134: 
135:   /***** p1 *****/
136: 
137:   /* We want to enter the waiting room so we can acquire the mutex and execute our critical section. */
138: 
139:   active(me) = true;
140: 
141:   /***** g1 *****/
142: 
143:   /* We make a snapshot of all the other tasks' flag bits.  This will permit us to enter the critical section
144:      in FIFO order with respect to any other tasks that simultaneously request the same mutex.  */
145: 
146:   for (them = 0; them < NUM_TASKS; them++) {
147: 
148:     /***** g2 *****/
149: 
150:     last_active[them] = waiting(them);
151:     last_parity[them] = parity(them);
152:   }
153: 
154:   /***** g3 *****/
155: 
156:   /* Next, we flip our parity bit.  This is done by all tasks.  It will prevent the same task from entering
157:      the critical section twice while another task is exiting the waiting room and has not yet entered the
158:      critical section once.  */
159: 
160:   parity(me) = !parity(me);
161: 
162:   /***** g4 *****/
163: 
164:   /* Now we are ready to enter the waiting room. */
165: 
166:   waiting(me) = true;
167: 
168:   /***** p2 *****/
169: 
170:   /* Wait until nobody else is shutting the door to the waiting room.  We count the number of passes we make
171:      thru the other tasks, and we make n-1 passes, one pass for each other task besides ourself.  
172: 
173:      Szymanski's paper states: "Even a process in the exit state may fail to keep the door closed, if a
174:      shut-down or abortion takes place.  In the scenario in Figure 3, a process P1 is able to sneak through
175:      the door into the waiting room, although in the view of a process P2 the door was closed by the variable
176:      s either in the process P3 or the process P2. Please note, that the second check of values of the
177:      communication variables would succeed in discovering that the door is indeed closed. In general, k+1-st
178:      check of communication variables yields the proper status of the door in the presence of at most k
179:      abortions and shut-downs of leader processes. Aborted and shut-down processes cannot pass through the
180:      door until the processes remaining inside the waiting room exit it. Thus, checking values of the
181:      communication variables has to be done at most n-1 times to ensure the proper result."  */
182: 
183:   for (passes = 1; passes < NUM_TASKS; passes++) {
184:     for (them = 0; them < NUM_TASKS; them++) {
185: 
186:       /***** p3 *****/
187: 
188:       while(shutting(them) && me != them) {
189:         reset(active(me), true);
190:         reset(shutting(me), false);
191:       }
192:     }
193:   }
194: 
195:   /***** p4 *****/
196: 
197:   /* We repeat this while-loop until we start shutting the door.  We enter this loop with our active and
198:      waiting bits both set.  It is here in this loop that try to shut the door, but if any other tasks are
199:      trying to get in the waiting room, then we must not shut the door yet. */
200:   
201:   while (!shutting(me)) {
202:     reset(waiting(me), true);                       /* Robustly insure that our bits don't get clobbered. */
203:     reset(active(me), false);
204:       
205:     /***** p5 *****/
206: 
207:     /* Check to see if any other tasks are outside the waiting room, trying to get in.  If so, the for-loop
208:        will break out early with them < NUM_TASKS.  */
209:       
210:     for (them = 0; them < NUM_TASKS; them++) {
211:       if (active(them)) {
212:         break;
213:       }
214:     }
215:     
216:     /***** p6 *****/
217: 
218:     /* If no other tasks were outside the waiting room, then them == NUM_TASKS is true, and we start
219:        shutting the door. */
220:     
221:     if (them == NUM_TASKS) {
222:       shutting(me) = true;
223:         
224:       /***** p6.1 *****/
225: 
226:       /* Double check to see if someone tried to enter the waiting room before we could get the door
227:          totally shut. */
228:         
229:       for (them = 0; them < NUM_TASKS; them++) {
230:         if (active(them)) {
231:           break;
232:         }
233:       }
234:         
235:       /***** p6.2 *****/
236: 
237:       /* Were there any late coming tasks trying to enter the waiting room? */
238:         
239:       if (them < NUM_TASKS) {               /* Yes! */
240:         shutting(me) = false;               /* We can't shut the door yet after all. */
241:       }
242:         
243:       /***** p6.3a *****/
244:         
245:       else {                               /* No! */
246:         waiting(me) = false;               /* We can finalize the shutting of the door. */
247:       }
248:     }
249:     
250:     /***** p7 *****/
251:     
252:     /* This is subtle: them < NUM_TASKS is true either if the for-loop at p5 broke out early, meaning that
253:        there were other tasks active, outside the waiting room, or if the for-loop at p6.1 broke out early,
254:        meaning that at least one task tried to get in the waiting room after we started to shut the door.
255:        In either case, it means that there are tasks outside the door trying to get into the waiting room.
256:        If this is the case, then we check to see if any tasks are thru the waiting room and either executing
257:        their critical section, or waiting their turn to execute the critical section after others finish the
258:        critical section first. */
259: 
260:     if (them < NUM_TASKS) {
261:       for (them = 0; them < NUM_TASKS; them++) {
262:         if (!waiting(them) && shutting(them)) {
263:           break;
264:         }
265:       }
266:     }
267:     
268:     /***** p8 *****/
269: 
270:     /* If there were any other tasks trying to get into the waiting room besides ourself, then we try again
271:        to shut the door. */
272:     
273:     if (them != me && them < NUM_TASKS) {
274:       shutting(me) = true;
275:         
276:       /***** p8.1a *****/
277: 
278:       /* If any other tasks are not also shutting the door, then we cannot shut it either. */
279:         
280:       if (!shutting(them)) {
281:         shutting(me) = false;
282:       }
283:         
284:       /***** p8.1b *****/
285: 
286:       /* Otherwise, all the other tasks in the waiting room are shutting the door, so we definately do
287:          shut the door. */
288: 
289:       else {
290:         waiting(me) = false;
291:       }
292:     }
293:   } /* end of while-loop, starting at p4, to shut the door */
294: 
295:   /***** p6.4 *****/
296: 
297:   /* We wait until there are no other tasks in the waiting room. */
298: 
299:   for (them = 0; them < NUM_TASKS; them++) {
300:     while (waiting(them) && !active(them) && me != them) {
301:         
302:       /***** p6.4a *****/
303:         
304:       reset(active(me), false);               /* While we wait, we keep our bits from getting clobbered. */
305:       reset(waiting(me), false);
306:     }
307:   }
308:   
309:   /***** d1 *****/
310: 
311:   /* This do-while loop implements a deadlock detection mechanism.  If we loop here n-1 times, then we are
312:      deadlocked, and it is OK after that for us to enter the critical section anyway, bypassing the dead
313:      task. */
314: 
315:   do {
316:     
317:     /***** g5 *****/
318: 
319:     /* Force FIFO ordering: check to see if any other processes that were active before we became active are
320:        still trying to acquire the mutex; if so, we must wait for them to finish before we take our turn. */
321: 
322:     for (them = 0; them < NUM_TASKS; them++) {
323:       if (last_active[them] && parity(them) == last_parity[them] && shutting(them)) {
324:         break;
325:       }
326:     }
327:     
328:     /***** d2 *****/
329: 
330:     /* Did we break out early from the above for-loop?  If so, then we must wait for some other task to finish
331:        before we can proceed. */
332:     
333:     if (them < NUM_TASKS) {
334:       deadlock_counter--;                       /* Count that we had to wait. */
335:         
336:       /***** d2.1 *****/
337: 
338:       /* Wait for any process that is in line ahead of us in the advanced transient state (see the paper) to
339:          transit to the exit state. */
340:         
341:       for (them = 0; them < NUM_TASKS; them++) {
342:         while(!active(them) && shutting(them)) {
343:               
344:           /***** d2.2 *****/
345: 
346:           reset(active(me), them >= me); /* Keep our bits from getting clobbered. */
347:           reset(waiting(me), false);
348:         }
349:       }
350:         
351:       /***** d2.3 *****/
352: 
353:       /* Wait for any process that is in line ahead of us in the exit state to return the mutex after it has
354:          executed its critical section. */
355:         
356:       for (them = 0; them < NUM_TASKS; them++) {
357:         while (active(them) && shutting(them)) {
358:               
359:           /***** d2.4 *****/
360:               
361:           reset(active(me), them < me);  /* Keep our bits from getting clobbered. */
362:           reset(waiting(me), false);
363:         }
364:       }
365:     }
366:     
367:     /***** d3 *****/
368:     
369:   } while (them < NUM_TASKS && deadlock_counter > 0); /* Loop unless deadlocked. */
370:   
371:   /***** p9 *****/
372: 
373:   /* This is the final wait for anybody with a lower task number that is ahead of us to finish. */
374:   
375:   for (them = 0; them < me; them++) {
376: 
377:     /* The "me != them" term in the line below appears in Szymanski's step p9, but it is redundant, since
378:        "me" can never be equal to "them" because of the terminaton condition in the for-loop above.  While
379:        this is not an error, it is a needless inefficiency; therefore, it has been commented out in the
380:        present implementation. */
381: 
382:     while (!active(them) && shutting(them) /* && me != them */ ) {
383:         
384:       /***** p9a *****/
385:         
386:       reset(active(me), false);               /* Keep our bits from getting clobbered. */
387:       reset(waiting(me), false);
388:     }
389:   }
390:   
391:   return;
392: }
393: 
394: 
395: /* After a task returns from acquire_mutex(), it performs its critical section, then it calls release_mutex()
396:    to permit the next task desiring exclusive access to acquire the mutex so it can perform its critical
397:    section.  */
398: 
399: 
400: /*** Release a mutex. ***/
401: 
402: void release_mutex(task_num me, mutex_num mutex) {
403: 
404:   /***** e1 *****/
405: 
406:   shutting(me) = false;                               /* Return my mutex and give the next task a turn. */
407: 
408:   return;
409: }
410: