/*****************************************************************************/
/* The development of this program is partly supported by IPA                */
/* (Information-Technology Promotion Agency, Japan).                         */
/*****************************************************************************/

/*****************************************************************************/
/*  bt_main.h - branch trace module header                                   */
/*  Copyright: Copyright (c) Hitachi, Ltd. 2005-2006                         */
/*             Authors: Yumiko Sugita (sugita@sdl.hitachi.co.jp),            */
/*                      Satoshi Fujiwara (sa-fuji@sdl.hitachi.co.jp)         */
/*                                                                           */
/*  This program is free software; you can redistribute it and/or modify     */
/*  it under the terms of the GNU General Public License as published by     */
/*  the Free Software Foundation; either version 2 of the License, or        */
/*  (at your option) any later version.                                      */
/*                                                                           */
/*  This program is distributed in the hope that it will be useful,          */
/*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/*  GNU General Public License for more details.                             */
/*                                                                           */
/*  You should have received a copy of the GNU General Public License        */
/*  along with this program; if not, write to the Free Software              */
/*  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111 USA      */
/*****************************************************************************/

#include <linux/version.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/vmalloc.h>
#include <linux/interrupt.h>
#include <asm/pgtable.h>
#include <asm/unistd.h>
#include "bt.h"

struct debugctl_bits ctl_bits;
struct debugctl_bits ctl_bits_pm = MSR_DEBUGCTL_PM_BITS;
struct debugctl_bits ctl_bits_p4 = MSR_DEBUGCTL_P4_BITS;
unsigned int ctl_on_bits, ctl_off_bits;

cpumask_t enable;
unsigned long *pid_tbl;
unsigned long *syscall_pid_tbl;
int pid_max;
////spinlock_t pid_tbl_lock = SPIN_LOCK_UNLOCKED;
DECLARE_BITMAP(syscall_exe_tbl, NR_syscalls);
DECLARE_BITMAP(syscall_filter_tbl, NR_syscalls);

unsigned long irq_desc_p;
unsigned long no_irq_type_p;
unsigned long switch_to_addr;
int switch_to_size;

int chk_sctime;

size_t btsbuf_size = BTS_BUF_MIN_SIZE;
size_t bts_int_margin_recs = DEFAULT_INT_MARGIN_RECS;
size_t subbuf_size = 2 * 1024 * 1024;
size_t subbuf_num = 16;
size_t subbuf_sleep_threshold = 8;
int mode;

DEFINE_PER_CPU(struct info_per_cpu, bt_info_per_cpu);

/* prototypes */
int reg_probe(void);
void unreg_probe(void);

int proc_init(void);
void proc_cleanup(void);
static void bt_mod_cleanup(void);

void write_bt_records(void*, size_t);
void write_pid_record(pid_t, char*);
void write_warn_record(unsigned long);
void write_tmr_record(unsigned long long, long);
int relfs_init(void);
void relfs_cleanup(void);
void relfs_flush(void);

int setup_isr(void);
void cleanup_isr(void);

#ifndef DEBUG
int serial_init(int ttyS) { return 0; }
asmlinkage int serial_prints(const char *fmt, ...) { return 0; }
#endif

/*
#define bts_warn_index(m)	\
	((m)->bts_max - (((m)->bts_max - (m)->bts_threshold) / 4))
	*/
#define bts_warn_index(m)	((m)->bts_max)

void bts_log_write(void)
{
	unsigned long flags;
	int cpu;
	struct info_per_cpu *info;
	struct ds_manage *ds_mng;
	struct pid_manage *pid_mng;

	/* Do the local_irq_save / restore for retrieving log files from dump
	 * file. If interrupt occur during bts_log_write, we loose tracking
	 * how many records are written. Thus, disable the interrupts.
	 */
	local_irq_save(flags);
	cpu = smp_processor_id();
	info = &per_cpu(bt_info_per_cpu, cpu);
	ds_mng = info->ds_manage;
	pid_mng = &info->pid_manage;

	if (chk_sctime != CHK_SCTIME_ON_WITHOUT_TRACE
	    && (!ds_mng || ds_mng->bts_index <= ds_mng->bts_base))
		goto EXIT;

	if (!pid_mng->is_wrote) {
		write_pid_record(pid_mng->pid, pid_mng->comm);
		pid_mng->is_wrote = 1;
	}
	if (chk_sctime != CHK_SCTIME_OFF && is_syscall(mode)) {
		if (pid_mng->syscall_start) {
			write_tmr_record(pid_mng->syscall_start,
					 pid_mng->n_syscall);
			pid_mng->syscall_start = 0;
		}
		if (pid_mng->syscall_end) {
			write_tmr_record(pid_mng->syscall_end, -1);
			pid_mng->syscall_end = 0;
		}
	}
	write_bt_records((void*)ds_mng->bts_base,
			 ds_mng->bts_index - ds_mng->bts_base);
	if (ds_mng->bts_index >= bts_warn_index(ds_mng)) {
		unsigned long left = ds_mng->bts_max - ds_mng->bts_index;

		serial_prints("!!! bts left only: %ld\n", left);
		write_warn_record(left);
	}
	ds_mng->bts_index = ds_mng->bts_base;
EXIT:
	local_irq_restore(flags);
}

int is_trace_pid(pid_t pid, unsigned long *pid_tbl)
{
	int rc = 0;
	////unsigned long flags;

	////spin_lock_irqsave(&pid_tbl_lock, flags);
	if (pid <= 0 || pid >= pid_max)
		rc = 0;
	else
		rc = test_bit(pid, pid_tbl);
	////spin_unlock_irqrestore(&pid_tbl_lock, flags);
	return rc;
}

void add_pid_tbl(pid_t pid, unsigned long *pid_tbl)
{
	////unsigned long flags;

	////spin_lock_irqsave(&pid_tbl_lock, flags);
	if (pid > 0 && pid < pid_max)
		set_bit(pid, pid_tbl);
	////spin_unlock_irqrestore(&pid_tbl_lock, flags);
}

void remove_pid_tbl(pid_t pid, unsigned long *pid_tbl)
{
	////unsigned long flags;

	////spin_lock_irqsave(&pid_tbl_lock, flags);
	if (pid > 0 && pid < pid_max)
		clear_bit(pid, pid_tbl);
	////spin_unlock_irqrestore(&pid_tbl_lock, flags);
}

int is_trace_syscall(long n_syscall)
{
	int rc = 0;

	if (n_syscall < 0 || n_syscall >= NR_syscalls)
		rc = 0;
	else
		rc = test_bit(n_syscall, syscall_filter_tbl);
	return rc;
}

void add_syscall_tbl(long n_syscall, unsigned long *syscall_tbl)
{

	if (n_syscall >= 0 && n_syscall < NR_syscalls)
		set_bit(n_syscall, syscall_tbl);
}

void bts_facility_on(void)
{
	int cpu = smp_processor_id();
	unsigned int low, high;
	struct info_per_cpu *info;

	info = &per_cpu(bt_info_per_cpu, cpu);
	if (!info->ds_manage)
		return;

	//serial_prints("on(%d)\n", cpu);
	rdmsr(MSR_DEBUGCTL, low, high);
	low |= ctl_on_bits;
	if (chk_sctime != CHK_SCTIME_ON_WITHOUT_TRACE)
		wrmsr(MSR_DEBUGCTL, low, high);
}

void bts_facility_off(void)
{
	int cpu = smp_processor_id();
	unsigned int low, high;
	struct info_per_cpu *info;

	info = &per_cpu(bt_info_per_cpu, cpu);
	if (!info->ds_manage)
		return;

	rdmsr(MSR_DEBUGCTL, low, high);
	low &= ~ctl_off_bits;
	wrmsr(MSR_DEBUGCTL, low, high);
	//serial_prints("off(%d)\n", cpu);
}

void bts_facility_save_and_off(int *is_on)
{
	int cpu = smp_processor_id();
	unsigned int low, high;
	struct info_per_cpu *info;

	info = &per_cpu(bt_info_per_cpu, cpu);
	if (!info->ds_manage)
		return;

	rdmsr(MSR_DEBUGCTL, low, high);
	*is_on = low & ctl_bits.tr;
	if (*is_on) {
		low &= ~ctl_off_bits;
		wrmsr(MSR_DEBUGCTL, low, high);
		//serial_prints("off(%d)\n", cpu);
	}
}

void bts_facility_restore(int is_on)
{
	int cpu = smp_processor_id();
	unsigned int low, high;
	struct info_per_cpu *info;

	info = &per_cpu(bt_info_per_cpu, cpu);
	if (!info->ds_manage)
		return;

	rdmsr(MSR_DEBUGCTL, low, high);
	if ((low ^ is_on) & ctl_bits.tr) {
		if (is_on) {
			//serial_prints("on(%d)\n", cpu);
			low |= ctl_on_bits;
		} else
			low &= ~ctl_off_bits;
		wrmsr(MSR_DEBUGCTL, low, high);
		/*
		if (!is_on)
			serial_prints("off(%d)\n", cpu);
			*/
	}
}

void bts_on_and_set_pid_info(int cpu, struct task_struct *t)
{
	struct info_per_cpu *info;
	struct pid_manage *pid_mng;

	info = &per_cpu(bt_info_per_cpu, cpu);
	pid_mng = &info->pid_manage;
	pid_mng->pid = t->pid;
	memcpy(pid_mng->comm, t->comm, BT_COMM_LEN);
	pid_mng->is_wrote = 0;
	bts_facility_on();
}

void bt_enable_per_cpu(void *data)
{
	int cpu;
	pid_t pid;
	struct info_per_cpu *info;
	struct pid_manage *pid_mng;

	/*
	serial_prints("bt_enable_per_cpu(cpu:%d, preemptible:%d)\n",
		      smp_processor_id(), preemptible());
		      */
	cpu = smp_processor_id();
	info = &per_cpu(bt_info_per_cpu, cpu);
	if (!info->ds_manage)
		return;

	pid = current->pid;
	if (is_kern_pid_by_hook(mode)) {
		if (!cpu_isset(cpu, enable))
			return;
		add_pid_tbl(pid, pid_tbl);
	} else
		cpu_set(cpu, enable);

	/* Be sure that do not trace the branch executions on the kernel space
	 * immediately after writing of /proc/btrax/enable.
	 */
	if (is_upid(mode))
		return;

	if (is_kern_all_by_hook(mode) ||
	    (!is_syscall_pid(mode) && is_trace_pid(pid, pid_tbl))) {
		pid_mng = &info->pid_manage;
		bts_on_and_set_pid_info(cpu, current);
		info->on_off_cnt++;
	}
}

void bt_disable_per_cpu(void *from_procfs)
{
	int cpu, save;
	pid_t pid;
	struct info_per_cpu *info;

	/*
	serial_prints("bt_disable_per_cpu(cpu:%d, preemptible:%d)\n",
		      smp_processor_id(), preemptible());
		      */
	cpu = smp_processor_id();
	info = &per_cpu(bt_info_per_cpu, cpu);
	if (!info->ds_manage)
		return;

	pid = current->pid;
	if (!cpu_isset(cpu, enable))
		return;

	/* The from_procfs variable shows execution from the on/off hook
	 * processing. It is NULL to stop at once when executed from procfs.
	 */
	bts_facility_save_and_off(&save);
	if (from_procfs || is_kern_all_by_hook(mode) ||
	    is_trace_pid(pid, pid_tbl)) {
		info->on_off_cnt++;
		bts_log_write();
		if (is_kern_pid_by_hook(mode))
			remove_pid_tbl(pid, pid_tbl);
	} else
		bts_facility_restore(save);
	if (is_both(mode) && !from_procfs)
		return;
	cpu_clear(cpu, enable);
}

void bt_enable(void)
{
	on_each_cpu(bt_enable_per_cpu, (void*)1, 1, 0);
}

void bt_disable(void)
{
	on_each_cpu(bt_disable_per_cpu, (void*)1, 1, 1);
	relfs_flush();
}

EXPORT_SYMBOL_GPL(bt_enable_per_cpu);
EXPORT_SYMBOL_GPL(bt_disable_per_cpu);
EXPORT_SYMBOL_GPL(bt_enable);
EXPORT_SYMBOL_GPL(bt_disable);

/* export for retrieve trace data from crash dump */
extern struct rchan *bt_channel;
extern size_t bt_dropped;
EXPORT_PER_CPU_SYMBOL_GPL(bt_info_per_cpu);
EXPORT_SYMBOL_GPL(bt_channel);
EXPORT_SYMBOL_GPL(bt_dropped);

static int check_bts_availability(void)
{
	unsigned int eax, ebx, ecx, edx;
	unsigned char family, model;
	
	cpuid(0x01, &eax, &ebx, &ecx, &edx);
	if (!(edx & (1 << 21))) {
		printk("%s: BTS facility is not available in this processor.\n",
		       MOD_NAME);
		return -ENOSYS;
	}
	family = (eax >> 8) & 0xf;
	model = (eax >> 4) & 0xf;
	if (family == 0xf) {
		family += (eax >> 20) & 0xff;
		model += ((eax >> 16) & 0xf) << 4;
	}
	if (family == 6 && (model == 9 || model == 13)) /* cpu is Pentium-M? */
		ctl_bits = ctl_bits_pm;
	else
		ctl_bits = ctl_bits_p4;
	ctl_on_bits  = ctl_bits.bts|ctl_bits.tr|ctl_bits.btint;
	ctl_off_bits = ctl_bits.bts|ctl_bits.tr|ctl_bits.btint|ctl_bits.lbr;

	rdmsr(MSR_DEBUGCTL, eax, edx);
	if (eax & ctl_bits.bts) {
		printk("%s: BTS facility is already used.\n", MOD_NAME);
		return -EBUSY;
	}
	return 0;
}

static void setup_ds_area_per_cpu(void *data)
{
	int cpu = smp_processor_id();
	struct ds_manage *ds_mng;
	unsigned int low, high;
	struct info_per_cpu *info;

	info = &per_cpu(bt_info_per_cpu, cpu);
	ds_mng = info->ds_manage;
	if (!ds_mng)
		return;
	rdmsr(MSR_IA32_DS_AREA, low, high);
	wrmsr(MSR_IA32_DS_AREA, ds_mng, high);
}

static int setup_ds_area(void)
{
	int i;
	struct ds_manage *ds_mng;
	struct info_per_cpu *info;

	btsbuf_size -= btsbuf_size % sizeof(struct bt_record); 
	for (i = 0; i < NR_CPUS; i++) {
		if (!cpu_isset(i, cpu_online_map))
			continue;

		ds_mng = (struct ds_manage*)vmalloc(sizeof(struct ds_manage)
						    + btsbuf_size);
		info = &per_cpu(bt_info_per_cpu, i);
		info->ds_manage = ds_mng;
		if (!ds_mng) {
			printk("%s: BTS buffer cannot allocate.\n", MOD_NAME);
			return -ENOMEM;
		}
		ds_mng->bts_base = (unsigned long)&ds_mng[1];
		ds_mng->bts_index = ds_mng->bts_base;
		ds_mng->bts_max = ds_mng->bts_base + btsbuf_size;
		ds_mng->bts_threshold = ds_mng->bts_max
			- bts_int_margin_recs * sizeof(struct bt_record);
	}
	on_each_cpu(setup_ds_area_per_cpu, NULL, 1, 1);
	return 0;
}

static void cleanup_ds_area(void)
{
	int i;
	struct info_per_cpu *info;

	for (i = 0; i < NR_CPUS; i++) {
		info = &per_cpu(bt_info_per_cpu, i);
		if (info->ds_manage) {
			vfree(info->ds_manage);
			info->ds_manage = NULL;
		}
	}
}

static int bt_mod_init(void)
{
	int rc = 0;

	/* check module parameters */
	if (!irq_desc_p || !no_irq_type_p || !switch_to_addr || !switch_to_size
	    || !subbuf_num || subbuf_size < BTS_BUF_MIN_SIZE
	    || !subbuf_sleep_threshold || btsbuf_size < BTS_BUF_MIN_SIZE
	    || bts_int_margin_recs < MIN_INT_MARGIN_RECS) {
		rc = -EINVAL;
		goto ERROR;
	}
	serial_init(1);
	if ((rc = reg_probe()) < 0)
		goto ERROR;
	if ((rc = check_bts_availability()) < 0)
		goto ERROR;
	if ((rc = setup_ds_area()) < 0)
		goto ERROR;
	if ((rc = proc_init()) < 0)
		goto ERROR;
	if ((rc = relfs_init()) < 0)
		goto ERROR;
	if ((rc = setup_isr()) < 0)
		goto ERROR;
	if (is_kern_pid_by_hook(mode))
		enable = cpu_online_map;
	printk("bt_mod loaded(%d)\n", mode);
	return rc;
ERROR:
	bt_mod_cleanup();
	return rc;
}

static void bt_mod_cleanup(void)
{
	bt_disable();
	cleanup_isr();
	relfs_cleanup();
	proc_cleanup();
	cleanup_ds_area();
	unreg_probe();

	return;
}

module_init(bt_mod_init);
module_exit(bt_mod_cleanup);

module_param(btsbuf_size, int, 0);
MODULE_PARM_DESC(btsbuf_size, "BTS buffer size.");
module_param(bts_int_margin_recs, int, 0);
MODULE_PARM_DESC(bts_int_margin_recs, "BTS buffer int margin records.");
module_param(subbuf_size, int, 0);
MODULE_PARM_DESC(subbuf_size, "relayfs buffer size.");
module_param(subbuf_num, int, 0);
MODULE_PARM_DESC(subbuf_num, "relayfs buffer number.");
module_param(subbuf_sleep_threshold, int, 0);
MODULE_PARM_DESC(subbuf_sleep_threshold,
		 "target process sleep threshold of relayfs buffers.");
module_param(pid_max, int, PID_MAX_DEFAULT);
MODULE_PARM_DESC(pid_max, "max pid.");
module_param(mode, int, 0);
MODULE_PARM_DESC(mode, "trace mode.");
module_param(irq_desc_p, ulong, 0);
module_param(no_irq_type_p, ulong, 0);
module_param(switch_to_addr, ulong, 0);
module_param(switch_to_size, int, 0);

module_param(chk_sctime, int, 0);

MODULE_AUTHOR("Yumiko Sugita <sugita@sdl.hitachi.co.jp>,\n" \
	      "\t\tSatoshi Fujiwara <sa-fuji@sdl.hitachi.co.jp>");
MODULE_DESCRIPTION("Hardware BTS facility controller");
MODULE_LICENSE("GPL");
