Commit 37d4b27b authored by Schacht Birger's avatar Schacht Birger

initial commit

parents
Pipeline #618 failed with stages
a.out
testdata
*.o
*.pdf
*0
EXECUTABLES=10 100 1000 10000 a.out
a.out: timelib.o main.c
gcc timelib.o main.c -Wall -lpthread -lcrypto
all: $(EXECUTABLES)
timelib.o: timelib.h timelib.c
gcc -Wall -g -c timelib.c -o timelib.o
clean:
rm -rf *.o $(EXECUTABLES) testdata *.pdf
10: timelib.o main.c
gcc timelib.o -Wall -DBUFSIZE=$@ main.c -lpthread -lcrypto -o $@
100: timelib.o main.c
gcc timelib.o -Wall -DBUFSIZE=$@ main.c -lpthread -lcrypto -o $@
1000: timelib.o main.c
gcc timelib.o -Wall -DBUFSIZE=$@ main.c -lpthread -lcrypto -o $@
10000: timelib.o main.c
gcc timelib.o -Wall -DBUFSIZE=$@ main.c -lpthread -lcrypto -o $@
test: a.out 10 100 1000 10000
./test.sh
pdf: a2.md
pandoc -o a2.pdf a2.md
Je grösser der Puffer ist, desto schneller kann der Hash gebildet werden, da nicht
immer zwischen den einzelnen Rechenschritten kleine Teile gelesen werden müssen.
4 Prozessor Maschine
====================
# Buffersize 10
* BUFSIZE 10, THREADS 2: Time used for calculating md5 checksums is 0s, 159021927ns
* BUFSIZE 10, THREADS 2: Time used for reading the directory is 0s, 28566ns
* BUFSIZE 10, THREADS 2: Time used for reading files is 3s, 601142207ns
* BUFSIZE 10, THREADS 4: Time used for calculating md5 checksums is 0s, 178492703ns
* BUFSIZE 10, THREADS 4: Time used for reading the directory is 0s, 23310ns
* BUFSIZE 10, THREADS 4: Time used for reading files is 7s, 640661246ns
* BUFSIZE 10, THREADS 6: Time used for calculating md5 checksums is 0s, 144907648ns
* BUFSIZE 10, THREADS 6: Time used for reading the directory is 0s, 24078ns
* BUFSIZE 10, THREADS 6: Time used for reading files is 5s, 641948580ns
# Buffersize 100
* BUFSIZE 100, THREADS 2: Time used for calculating md5 checksums is 0s, 62619465ns
* BUFSIZE 100, THREADS 2: Time used for reading the directory is 0s, 23983ns
* BUFSIZE 100, THREADS 2: Time used for reading files is 0s, 172702335ns
* BUFSIZE 100, THREADS 4: Time used for calculating md5 checksums is 0s, 64821766ns
* BUFSIZE 100, THREADS 4: Time used for reading the directory is 0s, 27580ns
* BUFSIZE 100, THREADS 4: Time used for reading files is 0s, 174798439ns
* BUFSIZE 100, THREADS 6: Time used for calculating md5 checksums is 0s, 63313029ns
* BUFSIZE 100, THREADS 6: Time used for reading the directory is 0s, 30202ns
* BUFSIZE 100, THREADS 6: Time used for reading files is 0s, 179016787ns
# Buffersize 1000
* BUFSIZE 1000, THREADS 2: Time used for calculating md5 checksums is 0s, 56183227ns
* BUFSIZE 1000, THREADS 2: Time used for reading the directory is 0s, 21753ns
* BUFSIZE 1000, THREADS 2: Time used for reading files is 0s, 23828327ns
* BUFSIZE 1000, THREADS 4: Time used for calculating md5 checksums is 0s, 52800488ns
* BUFSIZE 1000, THREADS 4: Time used for reading the directory is 0s, 23231ns
* BUFSIZE 1000, THREADS 4: Time used for reading files is 0s, 23851912ns
* BUFSIZE 1000, THREADS 6: Time used for calculating md5 checksums is 0s, 51018539ns
* BUFSIZE 1000, THREADS 6: Time used for reading the directory is 0s, 21384ns
* BUFSIZE 1000, THREADS 6: Time used for reading files is 0s, 21186582ns
# Buffersize 10000
* BUFSIZE 10000, THREADS 2: Time used for calculating md5 checksums is 0s, 47794104ns
* BUFSIZE 10000, THREADS 2: Time used for reading the directory is 0s, 23724ns
* BUFSIZE 10000, THREADS 2: Time used for reading files is 0s, 5336740ns
* BUFSIZE 10000, THREADS 4: Time used for calculating md5 checksums is 0s, 50001772ns
* BUFSIZE 10000, THREADS 4: Time used for reading the directory is 0s, 55742ns
* BUFSIZE 10000, THREADS 4: Time used for reading files is 0s, 5480767ns
* BUFSIZE 10000, THREADS 6: Time used for calculating md5 checksums is 0s, 50603566ns
* BUFSIZE 10000, THREADS 6: Time used for reading the directory is 0s, 35421ns
* BUFSIZE 10000, THREADS 6: Time used for reading files is 0s, 5503741ns
/*
* errno.h benutzen
*/
#define _POSIX_C_SOURCE 200809L
#include <errno.h>
#include <string.h> // strerror
#include <ctype.h>
#include <dirent.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <openssl/md5.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <limits.h> /* PATH_MAX */
#include "timelib.h"
#ifndef BUFSIZE
#define BUFSIZE 1024
#endif
extern int errno;
struct timespec readdir_time, readfiles_time, checksum_time;
pthread_mutex_t dirsync, readdir_time_mutex, readfiles_time_mutex, checksum_time_mutex=PTHREAD_MUTEX_INITIALIZER;
/*
* a struct for passing multiple arguments to a thread
*/
typedef struct vargs {
DIR *pDir;
char *dirname;
} t_vargs;
/*
* check if a given path is a regular file
*/
int is_regular_file(const char *path)
{
struct stat path_stat;
if (stat(path, &path_stat) == -1) {
perror("Error");
return 0;
} else {
return S_ISREG(path_stat.st_mode);
}
}
/*
* check if a given path is a directory
*/
int is_directory_file(const char *path)
{
struct stat path_stat;
if (stat(path, &path_stat) == -1) {
perror("Error");
return 0;
} else {
return S_ISDIR(path_stat.st_mode);
}
}
/* create a char representation of the byte array containing
* the md5 hash
* see https://stackoverflow.com/questions/6357031/how-do-you-convert-a-byte-array-to-a-hexadecimal-string-in-c
*/
void tohex(unsigned char * in, size_t insz, char * out, size_t outsz)
{
unsigned char * pin = in;
const char * hex = "0123456789ABCDEF";
char * pout = out;
for(; pin < in+insz; pout +=2, pin++){
pout[0] = hex[(*pin>>4) & 0xF];
pout[1] = hex[ *pin & 0xF];
if (pout + 2 - out > outsz){
/* Better to truncate output string than overflow buffer */
/* it would be still better to either return a status */
/* or ensure the target buffer is large enough and it never happen */
break;
}
}
}
/*
* lock our checksum_time_mutex, add the time and unlock the mutex
*/
void add_checksum_time(const struct timespec addtime) {
pthread_mutex_lock(&checksum_time_mutex);
add_time_323(&checksum_time, &addtime);
pthread_mutex_unlock(&checksum_time_mutex);
}
/*
* lock our readdir_time_mutex, add the time and unlock the mutex
*/
void add_readdir_time(const struct timespec addtime) {
pthread_mutex_lock(&readdir_time_mutex);
add_time_323(&readdir_time, &addtime);
pthread_mutex_unlock(&readdir_time_mutex);
}
/*
* lock our readfiles_time_mutex, add the time and unlock the mutex
*/
void add_readfiles_time(const struct timespec addtime) {
pthread_mutex_lock(&readfiles_time_mutex);
add_time_323(&readfiles_time, &addtime);
pthread_mutex_unlock(&readfiles_time_mutex);
}
/*
* calculate the md5 hash of the file at `path`
*/
unsigned char *md5(const char *path)
{
unsigned char *c = malloc(MD5_DIGEST_LENGTH);
unsigned char data[BUFSIZE];
int bytes = 1, fd;
struct timespec readfiles_time_start, checksum_time_start, readfiles_time_end, checksum_time_end;
/* every openssl md5 operation is accompanied by
* timestamps whose difference will be added up
*/
checksum_time_start = get_cur_time_323();
MD5_CTX mdContext;
MD5_Init(&mdContext);
checksum_time_end = get_cur_time_323();
add_checksum_time(get_diff_323(&checksum_time_start, &checksum_time_end));
if ((fd = open(path, O_RDONLY)) == -1)
perror("Error");
/* this is where the magic happens
* every read() operation is wrapped in accompanied
* by two timestamps whose difference will be added up
*/
while (bytes != 0) {
readfiles_time_start = get_cur_time_323();
bytes = read(fd, data, BUFSIZE);
readfiles_time_end = get_cur_time_323();
add_readfiles_time(get_diff_323(&readfiles_time_start, &readfiles_time_end));
/* another md5 operation accompanied by
* timestamps
*/
checksum_time_start = get_cur_time_323();
MD5_Update(&mdContext, data, bytes);
checksum_time_end = get_cur_time_323();
add_checksum_time(get_diff_323(&checksum_time_start, &checksum_time_end));
}
close(fd);
/* the last md5 operation accompanied by
* timestamps
*/
checksum_time_start = get_cur_time_323();
MD5_Final (c,&mdContext);
checksum_time_end = get_cur_time_323();
add_checksum_time(get_diff_323(&checksum_time_start, &checksum_time_end));
return c;
}
/* the function that is called by the threads
* started in the main function
*/
void *work_323(void *vargp)
{
t_vargs *args = (t_vargs*)vargp;
struct dirent *pDirent;
struct timespec readdir_time_start, readdir_time_end;
/* locking the dirsync mutex and starting the
* time counter, in a manner of speaking
*/
pthread_mutex_lock(&dirsync);
readdir_time_start = get_cur_time_323();
// set errno to 0 because so we can test against it
errno = 0;
while ((pDirent = readdir(args->pDir)) != NULL) {
pthread_mutex_unlock(&dirsync);
readdir_time_end = get_cur_time_323();
add_readdir_time(get_diff_323(&readdir_time_start, &readdir_time_end));
if(strcmp(pDirent->d_name, ".") == 0 || strcmp(pDirent->d_name, "..") == 0)
continue;
// create a buffer for the whole filepath
char buf[PATH_MAX];
snprintf(buf, PATH_MAX, "%s/%s", args->dirname, pDirent->d_name);
// only calculate md5 hashes if the file is a regular file
if (is_regular_file(buf)) {
unsigned char *hash = md5(buf);
char str[3*MD5_DIGEST_LENGTH];
tohex(hash, MD5_DIGEST_LENGTH, str, 3*MD5_DIGEST_LENGTH);
printf("(Thread ID %li)\t%s\t%s\n", pthread_self(), str, buf);
} else if (is_directory_file(buf)) {
t_vargs *subdir = malloc(sizeof args);
subdir->pDir = opendir(buf);
subdir->dirname = buf;
if (args->pDir == NULL) {
printf("Cannot open subdirectory '%s'\n", buf);
} else {
work_323(subdir);
}
free(subdir);
}
// start the clock for readdir before we cycle back to
// the beginning of the loop
pthread_mutex_lock(&dirsync);
readdir_time_start = get_cur_time_323();
}
if (errno) {
perror("Error");
}
pthread_mutex_unlock(&dirsync);
return NULL;
}
int main(int argc, char *argv[])
{
// initialize our timecounters
readdir_time.tv_sec = 0;
readdir_time.tv_nsec = 0;
checksum_time.tv_sec = 0;
checksum_time.tv_nsec = 0;
readfiles_time.tv_sec = 0;
readfiles_time.tv_nsec = 0;
// define the struct for the arguments
// of the threads
t_vargs *args = malloc(sizeof args);
// maybe we should use getopt
if (argc != 3) {
printf("Usage: %s <dirname> <threads>\n", argv[0]);
exit(EXIT_FAILURE);
}
// if (1==sscanf(argv[1], "%d", &temp))
int numthreads;
sscanf(argv[2], "%d", &numthreads);
pthread_t threads[numthreads-1];
args->pDir = opendir(argv[1]);
if (args->pDir == NULL) {
printf("Cannot open directory '%s'\n", argv[1]);
exit(EXIT_FAILURE);
}
args->dirname = argv[1];
for (int i = 0; i < numthreads; i++) {
pthread_create(&threads[i], NULL, work_323, (void*)args);
}
for (int i = 0; i < numthreads; i++) {
pthread_join(threads[i], NULL);
}
closedir(args->pDir);
free(args);
// create useable and parseable messages for output
char checksum_msg[256], readdir_msg[256], readfiles_msg[256];
char *msg = "BUFSIZE %i, THREADS %i: %s";
snprintf(checksum_msg, 256, msg, BUFSIZE, numthreads, "Time used for calculating md5 checksums is");
snprintf(readdir_msg, 256, msg, BUFSIZE, numthreads, "Time used for reading the directory is\t");
snprintf(readfiles_msg, 256, msg, BUFSIZE, numthreads, "Time used for reading files is\t\t");
write_time_323(checksum_msg, &checksum_time);
write_time_323(readdir_msg, &readdir_time);
write_time_323(readfiles_msg, &readfiles_time);
/*
* destroy all the mutices
*/
pthread_mutex_destroy(&dirsync);
pthread_mutex_destroy(&readdir_time_mutex);
pthread_mutex_destroy(&readfiles_time_mutex);
pthread_mutex_destroy(&checksum_time_mutex);
exit(EXIT_SUCCESS);
}
#!/bin/bash
BUFSIZES='10 100 1000 10000'
THREADS='2 4 6'
mkdir -p testdata/
blocksize=100000
count=1000
for i in `seq 1 4`; do
dd if=/dev/urandom of=testdata/${count}-${blocksize}-${i}.raw bs=${blocksize} count=${count} status=none
done
for BUFSIZE in ${BUFSIZES}; do
make ${BUFSIZE}
for i in ${THREADS}; do
time ./${BUFSIZE} testdata ${i} > /dev/null
printf "\n"
done
done
#include "timelib.h"
struct timespec get_cur_time_323()
{
struct timespec spec;
clock_gettime(CLOCK_REALTIME, &spec);
return spec;
}
struct timespec get_diff_323(const struct timespec *beginning, const struct timespec *end)
{
struct timespec diff;
diff.tv_sec = difftime(end->tv_sec, beginning->tv_sec);
if (end->tv_nsec >= beginning->tv_nsec) {
diff.tv_nsec = end->tv_nsec - beginning->tv_nsec;
} else {
diff.tv_nsec = BILLION - (beginning->tv_nsec - end->tv_nsec);
diff.tv_sec--;
}
return diff;
}
void write_time_323(const char *msg, const struct timespec *t)
{
fprintf(stderr, "%s\t%lds,\t%ldns\n", msg, t->tv_sec, t->tv_nsec);
}
void add_time_323(struct timespec *sum, const struct timespec *addtime)
{
sum->tv_sec += addtime->tv_sec;
sum->tv_nsec += addtime->tv_nsec;
if (sum->tv_nsec >= BILLION) {
sum->tv_nsec -= BILLION;
sum->tv_sec++;
}
}
#ifndef TIMELIB_H_
#define TIMELIB_H_
#include <stdio.h>
#include <time.h>
#define BILLION 1000000000
struct timespec get_cur_time_323();
void write_time_323(const char *msg, const struct timespec *t);
struct timespec get_diff_323(const struct timespec *a, const struct timespec *b);
void add_time_323(struct timespec *a, const struct timespec *b);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment