From 4216b9b8737b2bad9689b721b9e238456c0d4053 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Sun, 24 Nov 2019 00:04:06 +0100 Subject: static: MIME type detection using 'xdg-mime' or 'file' --- plugins/static/hashtable.h | 113 ++++++++++++++++++++++++ plugins/static/mime.c | 214 +++++++++++++++++++++++++++++++++++++++++++++ plugins/static/mime.h | 10 +++ plugins/static/static.c | 8 +- 4 files changed, 344 insertions(+), 1 deletion(-) create mode 100644 plugins/static/hashtable.h create mode 100644 plugins/static/mime.c create mode 100644 plugins/static/mime.h diff --git a/plugins/static/hashtable.h b/plugins/static/hashtable.h new file mode 100644 index 0000000..670eac6 --- /dev/null +++ b/plugins/static/hashtable.h @@ -0,0 +1,113 @@ +#pragma once + +#include +#include +#include +#include +#include "memory.h" + + +inline size_t string_hash_func(const char *key) { + size_t h = 0; + for (; *key; key++) h = ((h << 9) | (h >> (64 - 9))) ^ *key; + return h; +} + +// API: +// table prefix_make() +// void prefix_destroy(table) +// value* prefix_find(table*, const char *key) +// void prefix_insert(table*, const char *key, value) +// bool prefix_erase(table*, const char *key) +#define HASHTABLE_DEFINE(prefix_, value_t_) \ + struct prefix_ ## internal_table_entry { \ + char *key; \ + value_t_ value; \ + }; \ + \ + struct prefix_ ## table { \ + size_t size, load; /* size is always a power of 2 */ \ + struct prefix_ ## internal_table_entry *values; \ + }; \ + \ + struct prefix_ ## table prefix_ ## make(void) { \ + struct prefix_ ## table ht; \ + ht.size = 8; \ + ht.load = 0; \ + ht.values = malloc(ht.size, struct prefix_ ## internal_table_entry); \ + for (size_t i = 0; i < ht.size; i++) ht.values[i].key = NULL; \ + return ht; \ + } \ + \ + void prefix_ ## destroy(struct prefix_ ## table ht) { \ + for (size_t i = 0; i < ht.size; i++) free(ht.values[i].key); \ + free(ht.values); \ + } \ + \ + size_t prefix_ ## internal_probe_index(const struct prefix_ ## table *ht, const char *key) { \ + size_t h = string_hash_func(key) % ht->size; \ + for (size_t i = 0; i < ht->size; i++) { \ + if (!ht->values[h].key || strcmp(ht->values[h].key, key) == 0) return h; \ + h = (h + i) % ht->size; /* quadratic probing with index h + 1/2 i + 1/2 i^2 */ \ + } \ + assert(false); /* cannot happen unless the table is full, which we don't allow */ \ + } \ + \ + void prefix_ ## internal_grow_table(struct prefix_ ## table *ht); \ + \ + /* Returns pointer to value in table */ \ + value_t_* prefix_ ## find(struct prefix_ ## table *ht, const char *key) { \ + size_t idx = prefix_ ## internal_probe_index(ht, key); \ + if (ht->values[idx].key) return &ht->values[idx].value; \ + return NULL; \ + } \ + \ + /* Overwrites the existing value if already present */ \ + void prefix_ ## insert_nostrdup(struct prefix_ ## table *ht, char *key, value_t_ value) { \ + fprintf(stderr, "HT insert nostrdup load=%zu size=%zu\n", ht->load, ht->size); \ + if (ht->load * 4 >= ht->size * 3) prefix_ ## internal_grow_table(ht); \ + size_t idx = prefix_ ## internal_probe_index(ht, key); \ + if (!ht->values[idx].key) { \ + ht->values[idx].key = key; \ + ht->load++; \ + } \ + ht->values[idx].value = value; \ + } \ + \ + /* Overwrites the existing value if already present */ \ + void prefix_ ## insert(struct prefix_ ## table *ht, const char *key, value_t_ value) { \ + fprintf(stderr, "HT insert load=%zu size=%zu\n", ht->load, ht->size); \ + if (ht->load * 4 >= ht->size * 3) prefix_ ## internal_grow_table(ht); \ + size_t idx = prefix_ ## internal_probe_index(ht, key); \ + if (!ht->values[idx].key) { \ + ht->values[idx].key = strdup(key); \ + ht->load++; \ + } \ + ht->values[idx].value = value; \ + } \ + \ + /* Returns whether the element was indeed present */ \ + bool prefix_ ## erase(struct prefix_ ## table *ht, const char *key) { \ + size_t idx = prefix_ ## internal_probe_index(ht, key); \ + if (!ht->values[idx].key) return false; \ + free(ht->values[idx].key); \ + ht->values[idx].key = NULL; \ + ht->load--; \ + fprintf(stderr, "HT erase, afterwards load=%zu size=%zu\n", ht->load, ht->size); \ + return true; \ + } \ + \ + void prefix_ ## internal_grow_table(struct prefix_ ## table *ht) { \ + fprintf(stderr, "HT grow load=%zu size=%zu\n", ht->load, ht->size); \ + assert(ht->size * 2 != 0); \ + struct prefix_ ## table ht2; \ + ht2.size = ht->size * 2; \ + ht2.load = 0; \ + ht2.values = malloc(ht2.size, struct prefix_ ## internal_table_entry); \ + for (size_t i = 0; i < ht2.size; i++) ht2.values[i].key = NULL; \ + for (size_t i = 0; i < ht->size; i++) { \ + if (ht->values[i].key) prefix_ ## insert_nostrdup(&ht2, ht->values[i].key, ht->values[i].value); \ + } \ + free(ht->values); \ + *ht = ht2; \ + } diff --git a/plugins/static/mime.c b/plugins/static/mime.c new file mode 100644 index 0000000..6be5078 --- /dev/null +++ b/plugins/static/mime.c @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "memory.h" +#include "util.h" +#include "mime.h" +#include "hashtable.h" +#include "buffer.h" + + +static char* process_read(const char *cmd, char **argv) { + const int error_exit_status = 97; + + int outpipe[2]; + if (pipe(outpipe) < 0) { + perror("pipe"); + exit(1); + } + + pid_t pid = fork(); + if (pid < 0) { + perror("fork"); + exit(1); + } + + if (pid == 0) { + close(outpipe[0]); + dup2(outpipe[1], STDOUT_FILENO); + execvp(cmd, argv); + perror("execvp"); + exit(error_exit_status); + } + + close(outpipe[1]); + + struct buffer buffer = buffer_make(256); + char tempbuf[256]; + + while (true) { + ssize_t nr = read(outpipe[0], tempbuf, sizeof tempbuf); + if (nr < 0) { + if (errno == EINTR) continue; + perror("read"); + break; + } + + if (nr == 0) break; // eof + + buffer_append_mem(&buffer, tempbuf, nr); + } + + close(outpipe[0]); + + while (true) { + int status; + if (waitpid(pid, &status, 0) < 0) { + if (errno == EINTR) continue; + perror("waitpid"); + return NULL; + } + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) == error_exit_status) { + buffer_free(buffer); + return NULL; + } else if (WEXITSTATUS(status) == 0) { + return buffer.buf; + } else { + fprintf(stderr, "Command '%s' had unexpected exit code %d!\n", cmd, WEXITSTATUS(status)); + return NULL; + } + } + } +} + +static struct timespec get_file_mtime(const char *path) { + struct stat st; + memset(&st, 0, sizeof st); + if (stat(path, &st) < 0) { + return (struct timespec){.tv_sec = -1, .tv_nsec = -1}; + } + +#if defined(__LINUX__) + return st.st_mtim; +#elif defined(__APPLE__) + return st.st_mtimespec; +#else +#error Unknown operating system, how does the stat() data structure look? +#endif +} + +static void strip_string(char *str) { + size_t len = strlen(str); + size_t idx = 0; + while (str[idx] && isspace(str[idx])) idx++; + if (idx > 0) memmove(str, str + idx, len - idx + 1); + len -= idx; + while (len > 0 && isspace(str[len - 1])) str[--len] = '\0'; +} + +struct cache_entry { + char *mimetype; + struct timespec mtime; +}; + +HASHTABLE_DEFINE(mht_, struct cache_entry) + +struct mime_storage { + struct mht_table *mime_ht; +}; +static struct mime_storage global_storage = {NULL}; +static pthread_mutex_t global_storage_mutex; +static struct atomic_flag global_storage_inited = ATOMIC_FLAG_INIT; + +// Cannot be atomic_flag's because we need to load them without changing them +static atomic_bool xdg_works = ATOMIC_VAR_INIT(true); +static atomic_bool file_works = ATOMIC_VAR_INIT(true); + +#define WITH_MIME_STORAGE_LOCK(...) \ + do { \ + PTHREAD_CHECK(pthread_mutex_lock, &global_storage_mutex); \ + {__VA_ARGS__}; \ + PTHREAD_CHECK(pthread_mutex_unlock, &global_storage_mutex); \ + } while (0) + +void mime_init(void) { + if (!atomic_flag_test_and_set(&global_storage_inited)) { + PTHREAD_CHECK(pthread_mutex_init, &global_storage_mutex, NULL); + global_storage.mime_ht = malloc(1, struct mht_table); + *global_storage.mime_ht = mht_make(); + } +} + +static char* mime_detect_xdg(const char *path) { + char *pathdup = strdup(path); + char *argv[5] = { "xdg-mime", "query", "filetype", pathdup, NULL }; + char *output = process_read("xdg-mime", argv); + free(pathdup); + if (output) strip_string(output); + return output; +} + +static char* mime_detect_file(const char *path) { + char *pathdup = strdup(path); + char *argv[6] = { "file", "--brief", "--dereference", "--mime-type", pathdup, NULL }; + char *output = process_read("file", argv); + free(pathdup); + if (output) strip_string(output); + return output; +} + +static char* mime_detect_perform(const char *path) { + // Try 'file' first, because xdg-mime calls that on my system + if (atomic_load(&file_works)) { + fprintf(stderr, "mime: trying 'file'...\n"); + char *typ = mime_detect_file(path); + if (typ) return typ; + atomic_store(&file_works, false); + fprintf(stderr, "mime: 'file' doesn't work\n"); + } + + if (atomic_load(&xdg_works)) { + fprintf(stderr, "mime: trying 'xdg'...\n"); + char *typ = mime_detect_xdg(path); + if (typ) return typ; + atomic_store(&xdg_works, false); + fprintf(stderr, "mime: 'xdg-mime' doesn't work\n"); + } + + fprintf(stderr, "No working mimetype detection! Please fix " __FILE__ " for this platform!\n"); + + return NULL; +} + +const char* mime_detect(const char *path) { + struct timespec now_mtime = get_file_mtime(path); + if (now_mtime.tv_sec < 0) return NULL; + + char *typ = NULL; + + WITH_MIME_STORAGE_LOCK( + struct cache_entry *entry = mht_find(global_storage.mime_ht, path); + if (entry) { + if (now_mtime.tv_sec > entry->mtime.tv_sec || + (now_mtime.tv_sec == entry->mtime.tv_sec && + now_mtime.tv_nsec > entry->mtime.tv_nsec)) { + fprintf(stderr, "Erasing <%s>\n", path); + mht_erase(global_storage.mime_ht, path); + } else { + typ = entry->mimetype; + } + } + ); + + if (typ) return typ; + + typ = mime_detect_perform(path); + if (typ) { + struct cache_entry entry; + entry.mimetype = typ; + entry.mtime = now_mtime; + WITH_MIME_STORAGE_LOCK( + fprintf(stderr, "Inserting <%s> -> <%s>\n", path, typ); + mht_insert(global_storage.mime_ht, path, entry); + ); + } + return typ; +} diff --git a/plugins/static/mime.h b/plugins/static/mime.h new file mode 100644 index 0000000..c3641b5 --- /dev/null +++ b/plugins/static/mime.h @@ -0,0 +1,10 @@ +#pragma once + + +// Must be called at least once in the application, and those calls must not be +// simultaneous (because this function is not thread-safe itself). +void mime_init(); + +// Returns pointer to internal buffer, don't free +// This function is thread-safe. +const char* mime_detect(const char *path); diff --git a/plugins/static/static.c b/plugins/static/static.c index 0b39afe..61cd259 100644 --- a/plugins/static/static.c +++ b/plugins/static/static.c @@ -9,6 +9,7 @@ #include "plugin.h" #include "util.h" #include "buffer.h" +#include "mime.h" // Will only be set once in the registration function, which is before any threading is done @@ -110,7 +111,10 @@ static Handler_ret_t connection_handler(int sock, Headers *headers) { } } - buffer = build_response_headers("200 OK", "text/plain; charset=UTF-8", size); + const char *mimetype = mime_detect(path); + if (!mimetype) mimetype = "application/octet-stream"; + + buffer = build_response_headers("200 OK", mimetype, size); if (!buffer_append_file(&buffer, path)) { send_500(sock); goto cleanup_return; @@ -131,5 +135,7 @@ void plugin_register_yourself(register_callback_t callback) { exit(1); } + mime_init(); + callback("static", &connection_handler); } -- cgit v1.2.3