Computer Systems Fundamentals



A simple example which launches two threads of execution.
$ gcc -pthread two_threads.c -o two_threads $ ./two_threads | more Hello this is thread #1 i=0 Hello this is thread #1 i=1 Hello this is thread #1 i=2 Hello this is thread #1 i=3 Hello this is thread #1 i=4 Hello this is thread #2 i=0 Hello this is thread #2 i=1 ...

#include <pthread.h>
#include <stdio.h>

// This function is called to start thread execution.
// It can be given any pointer as an argument.
void *run_thread(void *argument) {
    int *p = argument;

    for (int i = 0; i < 10; i++) {
        printf("Hello this is thread #%d: i=%d\n", *p, i);
    }

    // A thread finishes when either the thread's start function
    // returns, or the thread calls `pthread_exit(3)'.
    // A thread can return a pointer of any type --- that pointer
    // can be fetched via `pthread_join(3)'
    return NULL;
}

int main(void) {
    // Create two threads running the same task, but different inputs.

    pthread_t thread_id1;
    int thread_number1 = 1;
    pthread_create(&thread_id1, NULL, run_thread, &thread_number1);

    pthread_t thread_id2;
    int thread_number2 = 2;
    pthread_create(&thread_id2, NULL, run_thread, &thread_number2);

    // Wait for the 2 threads to finish.
    pthread_join(thread_id1, NULL);
    pthread_join(thread_id2, NULL);

    return 0;
}


Simple example of running an arbitrary number of threads.
For example::
$ gcc -pthread n_threads.c -o n_threads $ ./n_threads 10 Hello this is thread 0: i=0 Hello this is thread 0: i=1 Hello this is thread 0: i=2 Hello this is thread 0: i=3 Hello this is thread 0: i=4 Hello this is thread 0: i=5 Hello this is thread 0: i=6 Hello this is thread 0: i=7 ...

#include <assert.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>

void *run_thread(void *argument) {
    int *p = argument;

    for (int i = 0; i < 42; i++) {
        printf("Hello this is thread %d: i=%d\n", *p, i);
    }
    return NULL;
}

int main(int argc, char *argv[]) {
    if (argc != 2) {
        fprintf(stderr, "Usage: %s <n-threads>\n", argv[0]);
        return 1;
    }

    int n_threads = strtol(argv[1], NULL, 0);
    assert(0 < n_threads && n_threads < 100);

    pthread_t thread_id[n_threads];
    int argument[n_threads];

    for (int i = 0; i < n_threads; i++) {
        argument[i] = i;
        pthread_create(&thread_id[i], NULL, run_thread, &argument[i]);
    }

    // Wait for the threads to finish
    for (int i = 0; i < n_threads; i++) {
        pthread_join(thread_id[i], NULL);
    }

    return 0;
}


Simple example which launches two threads of execution, but which demonstrates the perils of accessing non-local variables from a thread.
$ gcc -pthread two_threads_broken.c -o two_threads_broken $ ./two_threads_broken|more Hello this is thread 2: i=0 Hello this is thread 2: i=1 Hello this is thread 2: i=2 Hello this is thread 2: i=3 Hello this is thread 2: i=4 Hello this is thread 2: i=5 Hello this is thread 2: i=6 Hello this is thread 2: i=7 Hello this is thread 2: i=8 Hello this is thread 2: i=9 Hello this is thread 2: i=0 Hello this is thread 2: i=1 Hello this is thread 2: i=2 Hello this is thread 2: i=3 Hello this is thread 2: i=4 Hello this is thread 2: i=5 Hello this is thread 2: i=6 Hello this is thread 2: i=7 Hello this is thread 2: i=8 Hello this is thread 2: i=9 $...

#include <pthread.h>
#include <stdio.h>

void *run_thread(void *argument) {
    int *p = argument;

    for (int i = 0; i < 10; i++) {
        // variable thread_number will probably have changed in main
        // before execution reaches here
        printf("Hello this is thread %d: i=%d\n", *p, i);
    }

    return NULL;
}

int main(void) {
    pthread_t thread_id1;
    int thread_number = 1;
    pthread_create(&thread_id1, NULL, run_thread, &thread_number);

    thread_number = 2;
    pthread_t thread_id2;
    pthread_create(&thread_id2, NULL, run_thread, &thread_number);

    pthread_join(thread_id1, NULL);
    pthread_join(thread_id2, NULL);
    return 0;
}


Simple example of dividing a task between `n' threads.

Compile like:
$ gcc -O3 -pthread thread_sum.c -o thread_sum

One thread takes 10 seconds:
$ time ./thread_sum 1 10000000000 Creating 1 threads to sum the first 10000000000 integers Each thread will sum 10000000000 integers Thread summing integers 0 to 10000000000 finished sum is 49999999990067863552
Combined sum of integers 0 to 10000000000 is 49999999990067863552
real 0m11.924s user 0m11.919s sys 0m0.004s $

Four threads runs 4x as fast on a machine with 4 cores:
$ time ./thread_sum 4 10000000000 Creating 4 threads to sum the first 10000000000 integers Each thread will sum 2500000000 integers Thread summing integers 2500000000 to 5000000000 finished sum is 9374999997502005248 Thread summing integers 7500000000 to 10000000000 finished sum is 21874999997502087168 Thread summing integers 5000000000 to 7500000000 finished sum is 15624999997500696576 Thread summing integers 0 to 2500000000 finished sum is 3124999997567081472
Combined sum of integers 0 to 10000000000 is 49999999990071869440
real 0m3.154s user 0m12.563s sys 0m0.004s $

Note the result is inexact, because we use values can't be exactly represented as double and exact value printed depends on how many threads we use - because we break up the computation differently depending on the number of threads.

#include <assert.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>

struct job {
    long start, finish;
    double sum;
};

void *run_thread(void *argument) {
    struct job *j = argument;
    long start = j->start;
    long finish = j->finish;
    double sum = 0;

    for (long i = start; i < finish; i++) {
        sum += i;
    }

    j->sum = sum;

    printf("Thread summing integers %10lu to %11lu finished sum is %20.0f\n",
           start, finish, sum);
    return NULL;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        fprintf(stderr, "Usage: %s <n-threads> <n-integers-to-sum>\n", argv[0]);
        return 1;
    }

    int n_threads = strtol(argv[1], NULL, 0);
    assert(0 < n_threads && n_threads < 1000);
    long integers_to_sum = strtol(argv[2], NULL, 0);
    assert(0 < integers_to_sum);

    long integers_per_thread = (integers_to_sum - 1) / n_threads + 1;

    printf("Creating %d threads to sum the first %lu integers\n"
           "Each thread will sum %lu integers\n",
           n_threads, integers_to_sum, integers_per_thread);

    pthread_t thread_id[n_threads];
    struct job jobs[n_threads];

    for (int i = 0; i < n_threads; i++) {
        jobs[i].start = i * integers_per_thread;
        jobs[i].finish = jobs[i].start + integers_per_thread;

        if (jobs[i].finish > integers_to_sum) {
            jobs[i].finish = integers_to_sum;
        }

        // create a thread which will sum integers_per_thread integers
        pthread_create(&thread_id[i], NULL, run_thread, &jobs[i]);
    }

    // Wait for threads to finish, then add results for an overall sum.
    double overall_sum = 0;
    for (int i = 0; i < n_threads; i++) {
        pthread_join(thread_id[i], NULL);
        overall_sum += jobs[i].sum;
    }

    printf("\nCombined sum of integers 0 to %lu is %.0f\n", integers_to_sum,
           overall_sum);
    return 0;
}


Simple example demonstrating unsafe access to a global variable from threads.
$ gcc -O3 -pthread bank_account_broken.c -o bank_account_broken $ ./bank_account_broken Andrew's bank account has $108829 $

#define _POSIX_C_SOURCE 199309L

#include <pthread.h>
#include <stdio.h>
#include <time.h>

int bank_account = 0;

// add $1 to Andrew's bank account 100,000 times
void *add_100000(void *argument) {
    for (int i = 0; i < 100000; i++) {
        // execution may switch threads in middle of assignment
        // between load of variable value
        // and store of new variable value
        // changes other thread makes to variable will be lost
        nanosleep(&(struct timespec){ .tv_nsec = 1 }, NULL);

        // RECALL: shorthand for `bank_account = bank_account + 1`
        bank_account++;
    }

    return NULL;
}

int main(void) {
    // create two threads performing the same task

    pthread_t thread_id1;
    pthread_create(&thread_id1, NULL, add_100000, NULL);

    pthread_t thread_id2;
    pthread_create(&thread_id2, NULL, add_100000, NULL);

    // wait for the 2 threads to finish
    pthread_join(thread_id1, NULL);
    pthread_join(thread_id2, NULL);

    // will probably be much less than $200000
    printf("Andrew's bank account has $%d\n", bank_account);
    return 0;
}


Simple example demonstrating safe access to a global variable from threads, using a mutex (mutual exclusion) lock
$ gcc -O3 -pthread bank_account_mutex.c -o bank_account_mutex $ ./bank_account_mutex Andrew's bank account has $200000 $

#include <pthread.h>
#include <stdio.h>

int bank_account = 0;

pthread_mutex_t bank_account_lock = PTHREAD_MUTEX_INITIALIZER;

// add $1 to Andrew's bank account 100,000 times
void *add_100000(void *argument) {
    for (int i = 0; i < 100000; i++) {
        pthread_mutex_lock(&bank_account_lock);

        // only one thread can execute this section of code at any time

        bank_account = bank_account + 1;

        pthread_mutex_unlock(&bank_account_lock);
    }

    return NULL;
}

int main(void) {
    // create two threads performing  the same task

    pthread_t thread_id1;
    pthread_create(&thread_id1, NULL, add_100000, NULL);

    pthread_t thread_id2;
    pthread_create(&thread_id2, NULL, add_100000, NULL);

    // wait for the 2 threads to finish
    pthread_join(thread_id1, NULL);
    pthread_join(thread_id2, NULL);

    // will always be $200000
    printf("Andrew's bank account has $%d\n", bank_account);
    return 0;
}
! simple example which launches two threads of execution ! which increment a global variable
#include <pthread.h>
#include <stdio.h>

int andrews_bank_account = 200;
pthread_mutex_t andrews_bank_account_lock = PTHREAD_MUTEX_INITIALIZER;

int zacs_bank_account = 100;
pthread_mutex_t zacs_bank_account_lock = PTHREAD_MUTEX_INITIALIZER;

// Andrew sends Zac all his money dollar by dollar
void *andrew_send_zac_money(void *argument) {
    for (int i = 0; i < 100000; i++) {
        pthread_mutex_lock(&andrews_bank_account_lock);
        pthread_mutex_lock(&zacs_bank_account_lock);

        if (andrews_bank_account > 0) {
            andrews_bank_account--;
            zacs_bank_account++;
        }

        pthread_mutex_unlock(&zacs_bank_account_lock);
        pthread_mutex_unlock(&andrews_bank_account_lock);
    }

    return NULL;
}

// Zac sends Andrew all his money dollar by dollar
void *zac_send_andrew_money(void *argument) {
    for (int i = 0; i < 100000; i++) {
        pthread_mutex_lock(&zacs_bank_account_lock);
        pthread_mutex_lock(&andrews_bank_account_lock);

        if (zacs_bank_account > 0) {
            zacs_bank_account--;
            andrews_bank_account++;
        }

        pthread_mutex_unlock(&andrews_bank_account_lock);
        pthread_mutex_unlock(&zacs_bank_account_lock);
    }

    return NULL;
}

int main(void) {
    // create two threads sending each other money

    pthread_t thread_id1;
    pthread_create(&thread_id1, NULL, andrew_send_zac_money, NULL);

    pthread_t thread_id2;
    pthread_create(&thread_id2, NULL, zac_send_andrew_money, NULL);

    // threads will probably never finish
    // deadlock will likely likely occur
    // with one thread holding  andrews_bank_account_lock
    // and waiting for zacs_bank_account_lock
    // and the other thread holding  zacs_bank_account_lock
    // and waiting for andrews_bank_account_lock

    pthread_join(thread_id1, NULL);
    pthread_join(thread_id2, NULL);

    return 0;
}


Simple example demonstrating safe access to a global variable from threads, using atomics
$ gcc -O3 -pthread bank_account_atomic.c -o bank_account_atomic $ ./bank_account_atomic Andrew's bank account has $200000 $

#include <pthread.h>
#include <stdio.h>
#include <stdatomic.h>

atomic_int bank_account = 0;

// add $1 to Andrew's bank account 100,000 times
void *add_100000(void *argument) {
    for (int i = 0; i < 100000; i++) {
        // NOTE: This *cannot* be `bank_account = bank_account + 1`,
        // as that will not be atomic!
        // However, `bank_account++` would be okay
        // and,     `atomic_fetch_add(&bank_account, 1)` would also be okay
        bank_account += 1;
    }

    return NULL;
}

int main(void) {
    // create two threads performing  the same task

    pthread_t thread_id1;
    pthread_create(&thread_id1, NULL, add_100000, NULL);

    pthread_t thread_id2;
    pthread_create(&thread_id2, NULL, add_100000, NULL);

    // wait for the 2 threads to finish
    pthread_join(thread_id1, NULL);
    pthread_join(thread_id2, NULL);

    // will always be $200000
    printf("Andrew's bank account has $%d\n", bank_account);
    return 0;
}
! A small program that demonstrates lifetime issues ! with respect to differing threads ! ! When compiled with `dcc`: ! $ dcc -pthread thread_data_broken.c -o thread_data_broken ! $ ./thread_data_broken ! Hello there! Please patiently wait for your number! ! The number is 0x6c6c6548! ! ! Note that the 0x6c6c6548 value can easily change between ! compilers, platforms, or even individual executions. ! In this case, 0x6c6c6548 is the four ASCII bytes: ! 'l', 'l', 'e', 'H' -- the first four letters of "Hello there..." ! in little-endian byte ordering ! ! Curiously, the correct answer will occasionally appear: ! $ ./thread_data_broken ! Hello there! Please patiently wait for your number! ! The number is 0x42! !
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>

void *my_thread(void *data) {
    int number = *(int *)data;

    sleep(1);
    // should print 0x42, probably won't
    printf("The number is 0x%x!\n", number);

    return NULL;
}

pthread_t create_thread(void) {
    int super_special_number = 0x42;

    pthread_t thread_handle;
    pthread_create(&thread_handle, NULL, my_thread, &super_special_number);
    // super_special_number is destroyed when create_thread returns
    // but the thread just created may still be running and access it
    return thread_handle;
}

/// This function is entirely innocent but calling it below
/// (probably) makes the bug in create_thread obvious by changing stack memory
void say_hello(void) {
    char greeting[] = "Hello there! Please patiently wait for your number!\n";
    printf("%s", greeting);
}

int main(void) {
    pthread_t thread_handle = create_thread();

    say_hello();

    pthread_join(thread_handle, NULL);
}
! A potential solution to the issue in thread_data_broken.c ! ! This program uses a heap allocation to make sure that ! the memory will not be deallocated before the thread ! has a chance to read it. ! ! This in turn means that the thread is responsible for freeing ! the passed-in data.
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>

void *my_thread(void *data) {
    int number = *(int *)data;

    sleep(1);
    printf("The number is 0x%x!\n", number);

    free(data);
    return NULL;
}

pthread_t function_creates_thread(void) {
    int *super_special_number = malloc(sizeof(int));
    *super_special_number = 0x42;

    pthread_t thread_handle;
    pthread_create(&thread_handle, NULL, my_thread, super_special_number);

    return thread_handle;
}

void function_says_hello(void) {
    char greeting[] = "Hello there! Please patiently wait for your number!\n";
    printf("%s", greeting);
}

int main(void) {
    pthread_t thread_handle = function_creates_thread();

    function_says_hello();

    pthread_join(thread_handle, NULL);
}
! A potential solution to the issue in thread_data_broken.c ! ! This program does not need an allocation in order to fix ! the previous lifetime issue. Instead it makes sure the thread ! has a chance to read the memory (and copy it into its own stack) ! before it is deallocated, by using a barrier. ! ! The barrier is initialised with a value of 2: ! - One for the caller thread, so it doesn't deallocate the memory immediately ! - One for the called thread, so it can signal when it has copied the memory ! ! Performance in execution speed is incredibly similar to malloc-version, ! but does not rely on additional allocation, which is an occasional ! real-world constraint. ! ! NOTE: `dcc` tends to not like this example code ! try running with `clang` or `gcc` instead.
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>

struct thread_data {
    pthread_barrier_t *barrier;
    int number;
};

void *my_thread(void *data) {
    struct thread_data *thread_data = (struct thread_data *)data;
    int number = thread_data->number;
    pthread_barrier_wait(thread_data->barrier);

    sleep(1);
    printf("The number is 0x%x!\n", number);

    return NULL;
}

pthread_t function_creates_thread(void) {
    pthread_barrier_t barrier;
    pthread_barrier_init(&barrier, NULL, 2);

    struct thread_data data = {
        .barrier = &barrier,
        .number = 0x42,
    };

    pthread_t thread_handle;
    pthread_create(&thread_handle, NULL, my_thread, &data);

    pthread_barrier_wait(&barrier);

    return thread_handle;
}

void function_says_hello(void) {
    char greeting[] = "Hello there! Please patiently wait for your number!\n";
    printf("%s", greeting);
}

int main(void) {
    pthread_t thread_handle = function_creates_thread();

    function_says_hello();

    pthread_join(thread_handle, NULL);
}
! An example demonstrating the performance discrepancy ! between a Mutex guarding an unsigned int vs a single atomic_uint. ! ! Compile like: ! `clang -pthread -D <PERF_USE_NONE|PERF_USE_MUTEX|PERF_USE_ATOMIC> mutex_atomic_perf.c -o mutex_atomic_perf` ! ! It will then build the code using the preferred method of synchronisation. ! Note that *no synchronisation* will almost certainly give an incorrect sum -- this is to be expected.
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <stdatomic.h>

// Make sure at-most one flag has been provided,
// otherwise we will have a compiler error
#ifdef PERF_USE_NONE
#ifdef PERF_USE_MUTEX
#error "only *one* option can be specified"
#endif // PERF_USE_MUTEX
#ifdef PERF_USE_ATOMIC
#error "only *one* option can be specified"
#endif // PERF_USE_ATOMIC
#endif // PERF_USE_NONE
#ifdef PERF_USE_MUTEX
#ifdef PERF_USE_ATOMIC
#error "only *one* option can be specified"
#endif // PERF_USE_ATOMIC
#endif // PERF_USE_MUTEX

// If no flag has been provided, default to NONE
#ifndef PERF_USE_NONE
#ifndef PERF_USE_MUTEX
#ifndef PERF_USE_ATOMIC
#define PERF_USE_NONE
#endif // PERF_USE_ATOMIC
#endif // PERF_USE_MUTEX
#endif // PERF_USE_NONE

// No synchronisation -- just a plain `unsigned int`
#ifdef PERF_USE_NONE
unsigned int count = 0;
#endif // PERF_USE_NONE

// Mutex synchronisation -- a lock and a plain `unsigned int`
//   that must be protected behind the lock
#ifdef PERF_USE_MUTEX
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
unsigned int count = 0;
#endif // PERF_USE_MUTEX

// Atomic synchronisation -- an `atomic_uint`
#ifdef PERF_USE_ATOMIC
atomic_uint count = 0;
#endif // PERF_USE_ATOMIC

void *thread(void *data) {
    unsigned long n_increments = *(unsigned long *)data;

#ifdef PERF_USE_NONE
    // If no synchronisation, just increment the count
    for (int i = 0; i < n_increments; i++) {
        count++;
    }
#endif // PERF_USE_NONE

#ifdef PERF_USE_MUTEX
    // If mutex synchronisation, acquire the
    // lock, increment the count, then release
    // the lock
    for (int i = 0; i < n_increments; i++) {
        pthread_mutex_lock(&lock);
        count++;
        pthread_mutex_unlock(&lock);
    }
#endif // PERF_USE_MUTEX

#ifdef PERF_USE_ATOMIC
    // If atomic synchronisation, just
    // increment -- the synchronisation is
    // handled automagically for us!
    for (int i = 0; i < n_increments; i++) {
        count++;
    }
#endif // PERF_USE_ATOMIC

    return NULL;
}

int main(int argc, char *argv[]) {
    if (!argv[0] || !argv[1] || !argv[2]) {
        fprintf(stderr, "Usage: %s <n threads> <n increments per thread>\n",
                argv[0]);
        return EXIT_FAILURE;
    }

    unsigned long n_threads = strtoul(argv[1], NULL, 0);
    unsigned long n_increments = strtoul(argv[2], NULL, 0);

    pthread_t *threads = malloc(n_threads * sizeof(pthread_t));
    for (int i = 0; i < n_threads; i++) {
        pthread_create(&threads[i], NULL, thread, &n_increments);
    }

    for (int i = 0; i < n_threads; i++) {
        pthread_join(threads[i], NULL);
    }

    printf("Final result: %d\n", (int)count);
}