/*
 * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
 * All rights reserved.
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <sys/time.h>

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <strings.h>
#include <pthread.h>
#include <unistd.h>
#include <signal.h>
#define __USE_GNU
#include <string.h>

#include "ntl.h"
#include "list.h"
#include "lock.h"
#include "ntl_sched.h"

#define NTL_SCHED_MAX_ITEMS	 32

static int ntl_real_thread_num = 1, ntl_cpu_num;

#if NTL_SCHED_MAX_ITEMS%BITS_PER_LONG
#error "Wrong value NTL_SCHED_MAX_ITEMS, must be multiple of BITS_PER_LONG"
#endif

struct ntl_l2
{
	DECLARE_BITMAP(bitmap, NTL_SCHED_MAX_ITEMS);
	struct ntl_thread 	*entry[NTL_SCHED_MAX_ITEMS];
};

struct ntl_l1
{
	DECLARE_BITMAP(bitmap, NTL_SCHED_MAX_ITEMS);
	struct ntl_l2		*entry[NTL_SCHED_MAX_ITEMS];
};

#define NTL_SCHED_RNUM		128

struct ntl_run_queue
{
	DECLARE_BITMAP(prio_map, NTL_SCHED_RNUM);
	struct list_head	task_list[NTL_SCHED_RNUM];
};

static struct ntl_run_queue *ntl_rqueue[2], *ntl_current_rqueue;

static struct timeval ntl_current_time;
static struct itimerval ntl_sched_itimer;

static struct ntl_lock ntl_sched_lock;
static struct ntl_l1 *ntl_cache;

/* Maximum number of microseconds each task is allowed to run without rescheduling */
static long default_schedule_timeout = 10000;

static struct ntl_thread *th_current = NULL;
struct ntl_thread *th_setup = NULL;

static void ntl_set_bit(unsigned long *mask, int nr)
{
	int index = nr/BITS_PER_LONG;
	int off = nr%BITS_PER_LONG;

	mask[index] |= 1<<off;
}

static void ntl_clear_bit(unsigned long *mask, int nr)
{
	int index = nr/BITS_PER_LONG;
	int off = nr%BITS_PER_LONG;

	mask[index] &= ~(1<<off);
}

static int ntl_search_bit(unsigned long *mask, int num)
{
	int i;
	
	for (i=0; i<num; ++i) {
		if (mask[i] != -1UL) {
			int bit_pos = ffsl(~mask[i]);

			if (!bit_pos)
				continue;
			bit_pos = bit_pos - 1 + i*BITS_PER_LONG;
			return bit_pos;
		}
	}

	return -1;
}

static int ntl_search_bit_set(unsigned long *mask, int num)
{
	int i;
	
	for (i=0; i<num; ++i) {
		if (mask[i] != -1UL) {
			int bit_pos = ffsl(mask[i]);

			if (!bit_pos)
				continue;
			bit_pos = bit_pos - 1 + i*BITS_PER_LONG;
			return bit_pos;
		}
	}

	return -1;
}

/*
 * Must be called under scheduler's lock.
 */
static void ntl_sched_enqueue(struct ntl_thread *th, int add, int head)
{
	struct ntl_run_queue *rq;

	if (th->last >= NTL_SCHED_RNUM)
		th->last = NTL_SCHED_RNUM - 1;

	if (th->last == 0) {
		int num = 0;

		th->last = th->prio;
		if (ntl_current_rqueue == ntl_rqueue[0])
			num = 1;
		rq = ntl_rqueue[num];
		th->run_queue_num = num;
	} else
		rq = ntl_current_rqueue;

	if (add) {
		if (head)
			list_add(&th->task_entry, &rq->task_list[th->last]);
		else
			list_add_tail(&th->task_entry, &rq->task_list[th->last]);
		ntl_set_bit(rq->prio_map, th->last);
	} else
		INIT_LIST_HEAD(&th->task_entry);

	ulog("%s: th: %p, rq: %p, prio: %d, last: %d, rqnum: %d, current: %p, rqueue: %p, %p, add: %d.\n",
			__func__, th, rq, th->prio, th->last, th->run_queue_num,
			ntl_current_rqueue, ntl_rqueue[0], ntl_rqueue[1], add);
}

/*
 * Must be called under scheduler's lock.
 */
static void ntl_sched_dequeue(struct ntl_thread *th)
{
	struct ntl_run_queue *rq;

	if (th == th_current)
		th_current = NULL;

	if (!list_empty(&th->task_entry))
		list_del_init(&th->task_entry);

	rq = ntl_rqueue[th->run_queue_num];
	if (list_empty(&rq->task_list[th->last]))
		ntl_clear_bit(rq->prio_map, th->last);
	ulog("%s: th: %p, rq: %p, prio: %d, last: %d, rqnum: %d, current: %p, rqueue: %p, %p, empty: %d.\n",
			__func__, th, rq, th->prio, th->last, th->run_queue_num,
			ntl_current_rqueue, ntl_rqueue[0], ntl_rqueue[1], list_empty(&rq->task_list[th->last]));
}

static void *ntl_wrapper_run(struct ntl_thread *th)
{
	void *ret = NULL;

	ulog("%s: start func: %p, data: %p.\n", __func__, th->func, th->priv);
	ntl_arch_call_func(th->func, th->priv, th->stack, th->stack_size);
	ulog("%s: stop func: %p, data: %p.\n", __func__, th->func, th->priv);
	ntl_thread_exit(th);
	return ret;
}

static void *ntl_wrapper_pthread(void *arg)
{
	struct ntl_thread *th = arg;

	ntl_wrapper_run(th);

	while (ntl_schedule());

	ntl_real_thread_num--;

	return NULL;
}

typedef void (*wrapper_func_t)(void);

int ntl_sched_add(struct ntl_thread *th)
{
	int bit_pos = -1, i, err;
	
	ntl_lock(&ntl_sched_lock);
	for (i=0; i<NTL_SCHED_MAX_ITEMS; ++i) {
		struct ntl_l2 *l2 = ntl_cache->entry[i];

		if (!l2) {
			l2 = malloc(sizeof(struct ntl_l2));
			if (!l2)
				break;
			memset(l2, 0, sizeof(struct ntl_l2));

			l2->entry[0] = th;
			ntl_set_bit(l2->bitmap, 0);

			ntl_cache->entry[i] = l2;

			bit_pos = i*NTL_SCHED_MAX_ITEMS;
			break;
		}
		bit_pos = ntl_search_bit(l2->bitmap, sizeof(l2->bitmap)/sizeof(l2->bitmap[0]));
		if (bit_pos >= 0) {
			ntl_set_bit(l2->bitmap, bit_pos);
			l2->entry[bit_pos] = th;
			bit_pos += i*NTL_SCHED_MAX_ITEMS;
			break;
		}
	}
	th->tid = bit_pos;

	if (bit_pos >= 0) {
		th_setup = th;

		th_setup->flags = 0;
		kill(getpid(), SIGUSR1);
		while (th_setup->flags == 0) {
			ulog("%s: waiting: th_current: %p, setup: %p flags: %lx.\n", 
					__func__, th_current, th_setup, th_setup->flags);
			sleep(1);
		}
		if (th_current)
			ntl_sched_enqueue(th, 1, 1);
		else {
			ntl_sched_enqueue(th, 0, 0);
			th_current = th;
		}
	}

	ntl_unlock(&ntl_sched_lock);

	if (bit_pos < 0)
		return -1;

	if (th->func) {
		if (ntl_real_thread_num < ntl_cpu_num) {
			pthread_t ptid;

			ntl_real_thread_num++;
			err = pthread_create(&ptid, NULL, ntl_wrapper_pthread, th);
			if (err)
				ntl_real_thread_num--;
		}
	}

	return 0;
}

void ntl_sched_remove(struct ntl_thread *th)
{
	int l1_index, off;
	struct ntl_l2 *l2;
	
	l1_index = th->tid/NTL_SCHED_MAX_ITEMS;
	off = th->tid%NTL_SCHED_MAX_ITEMS;

	l2 = ntl_cache->entry[l1_index];
	if (!l2)
		return;

	ntl_lock(&ntl_sched_lock);
	ntl_clear_bit(l2->bitmap, off);
	ntl_sched_dequeue(th);
	ntl_unlock(&ntl_sched_lock);
}

struct ntl_thread *ntl_thread_get(int tid)
{
	int l1_index, off;
	struct ntl_l2 *l2;

	l1_index = tid/NTL_SCHED_MAX_ITEMS;
	off = tid%NTL_SCHED_MAX_ITEMS;

	l2 = ntl_cache->entry[l1_index];
	if (!l2)
		return NULL;

	return l2->entry[off];
}

static void ntl_sched_recalc_time(struct ntl_thread *th)
{
	long diff;

	diff = (ntl_current_time.tv_sec - th->start.tv_sec)*1000000 + (ntl_current_time.tv_usec - th->start.tv_usec);
	if (diff > default_schedule_timeout) {
		ulog("%s: task has wrong timeslice th: %p, prio: %d, diff: %ld, max: %ld.\n",
				__func__, th, th->prio, diff, default_schedule_timeout);
		diff = default_schedule_timeout;
	}

	th->last = th->prio*(default_schedule_timeout-diff)/default_schedule_timeout;
	ulog("%s: th: %p, prio: %d, last: %d, diff: %ld, default: %ld.\n",
			__func__, th, th->prio, th->last, diff, default_schedule_timeout);
}

void ntl_recalc_current(void)
{
	ntl_sched_recalc_time(th_current);
	ntl_sched_enqueue(th_current, 1, 0);
}

static struct ntl_thread *__ntl_schedule(void)
{
	struct ntl_thread *th = NULL, *old;
	int idx, switched = 0;

	ulog("%s, current: %p\n", __func__, th_current);

	if (th_current)
		ntl_recalc_current();

	old = th_current;

	if (0)
	{
		int i;

		for (i=0; i<2; ++i) {
			struct ntl_run_queue *rq = ntl_rqueue[i];
			int j;

			ulog("rq: %p: ", rq);
			for (j=0; j<NTL_SCHED_RNUM/BITS_PER_LONG; ++j)
				ulog("%016lx ", rq->prio_map[j]);
			for (j=0; j<NTL_SCHED_RNUM; ++j)
				list_for_each_entry(th, &rq->task_list[j], task_entry) {
					ulog("[%d %p %d/%d] ", j, th, th->last, th->prio);
				}
			ulog("\n");
		}
	}
again:
	idx = ntl_search_bit_set(ntl_current_rqueue->prio_map, NTL_SCHED_RNUM/BITS_PER_LONG);
	if (idx < 0) {
		struct ntl_run_queue *prev = ntl_current_rqueue;

		if (!switched) {
			if (ntl_current_rqueue == ntl_rqueue[0])
				ntl_current_rqueue = ntl_rqueue[1];
			else
				ntl_current_rqueue = ntl_rqueue[0];
			ulog("%s: switched from %p to %p.\n", __func__, prev, ntl_current_rqueue);
			switched = 1;
			goto again;
		}
	} else {
		if (list_empty(&ntl_current_rqueue->task_list[idx])) {
			ulog("%s: queue is empty, but map is not: prio: %d, rq: %p.\n",
				__func__, idx, ntl_current_rqueue);
			ntl_clear_bit(ntl_current_rqueue->prio_map, idx);
			return th_current;
		}

		th = list_entry(ntl_current_rqueue->task_list[idx].prev, struct ntl_thread, task_entry);

		ulog("New current: %p, prio: %d, last: %d, old: %p, func: %p.\n", 
				th, th->prio, th->last, th_current, th->func);

		ntl_sched_dequeue(th);
		th->start = ntl_current_time;
		th_current = th;
	}
	ulog("%s: %p -> %p.\n", __func__, old, th_current);

	return th_current;
}

void ntl_sched_find_current(struct ntl_thread **oldp, struct ntl_thread **newp)
{
	__ntl_lock(&ntl_sched_lock);
	*oldp = th_current;
	ntl_current_time.tv_usec += default_schedule_timeout;
	if (ntl_current_time.tv_usec >= 1000000) {
		ntl_current_time.tv_sec++;
		ntl_current_time.tv_usec -= 1000000;
	}
	*newp = __ntl_schedule();
	__ntl_unlock(&ntl_sched_lock);
}

int ntl_schedule(void)
{
	kill(getpid(), SIGALRM);
	return 0;
}

int ntl_sched_init(void)
{
	int err = -ENOMEM, i, j;
	
	ntl_cpu_num = sysconf(_SC_NPROCESSORS_ONLN);

	ntl_lock_init(&ntl_sched_lock);

	for (i=0; i<2; ++i) {
		ntl_rqueue[i] = malloc(sizeof(struct ntl_run_queue) * NTL_SCHED_RNUM);
		if (!ntl_rqueue[i])
			return -ENOMEM;
		memset(ntl_rqueue[i], 0, sizeof(struct ntl_run_queue) * NTL_SCHED_RNUM);

		for (j=0; j<NTL_SCHED_RNUM; ++j)
			INIT_LIST_HEAD(&ntl_rqueue[i]->task_list[j]);
	}

	ntl_current_rqueue = ntl_rqueue[0];

	ntl_cache = malloc(sizeof(struct ntl_l1));
	if (!ntl_cache)
		goto err_out_free_rqueue;
	memset(ntl_cache, 0, sizeof(struct ntl_l1));

	err = -ENOMEM;
	ntl_cache->entry[0] = malloc(sizeof(struct ntl_l2));
	if (!ntl_cache->entry[0])
		goto err_out_free_cache;
	memset(ntl_cache->entry[0], 0, sizeof(struct ntl_l2));

	ntl_set_bit(ntl_cache->bitmap, 0);

	signal(SIGALRM, ntl_arch_sched_sighandler);
	signal(SIGUSR1, ntl_arch_sched_sigsetup);

	ntl_sched_itimer.it_interval.tv_sec = 0;
	ntl_sched_itimer.it_interval.tv_usec = default_schedule_timeout;
	ntl_sched_itimer.it_value.tv_sec = 0;
	ntl_sched_itimer.it_value.tv_usec = default_schedule_timeout;

	err = setitimer(ITIMER_REAL, &ntl_sched_itimer, NULL);
	if (err)
		goto err_out_free_entry;

	return 0;

err_out_free_entry:
	ntl_clear_bit(ntl_cache->bitmap, 0);
	free(ntl_cache->entry[0]);
err_out_free_cache:
	free(ntl_cache);
err_out_free_rqueue:
	for (i=0; i<2; ++i)
		free(ntl_rqueue[i]);
	return err;
}

void ntl_sched_exit(void)
{
	ulog("%s\n", __func__);
	ntl_sched_itimer.it_interval.tv_sec = 0;
	ntl_sched_itimer.it_interval.tv_usec = 0;
	ntl_sched_itimer.it_value.tv_sec = 0;
	ntl_sched_itimer.it_value.tv_usec = 0;

	setitimer(ITIMER_REAL, &ntl_sched_itimer, NULL);
}
