#include <stdio.h>
#include "hardmeter.h"

#define EBS_MAX_SIZE		524270 /* no check yet. */
#define PEBS_MAX_SIZE		52427

/* Counter Index */
#define P4_BPU_CCCR0		0
#define P4_BPU_CCCR1		1
#define P4_BPU_CCCR2		2
#define P4_BPU_CCCR3		3
#define P4_MS_CCCR0		4
#define P4_MS_CCCR1		5
#define P4_MS_CCCR2		6
#define P4_MS_CCCR3		7
#define P4_FLAME_CCCR0		8
#define P4_FLAME_CCCR1		9
#define P4_FLAME_CCCR2		10
#define P4_FLAME_CCCR3		11
#define P4_IQ_CCCR0		12
#define P4_IQ_CCCR1		13
#define P4_IQ_CCCR2		14
#define P4_IQ_CCCR3		15
#define P4_IQ_CCCR4		16
#define P4_IQ_CCCR5		17

#define P4_FAST_RDPMC		0x80000000

/* member of CCCR */
#define P4_CCCR_CASCADE		0x40000000
#define P4_CCCR_OVF_PMI_T1	0x08000000
#define P4_CCCR_OVF_PMI_T0	0x04000000
#define P4_CCCR_ACTIVE_THREAD	0x00030000
#define P4_CCCR_ESCR_SELECT(x)	((x) << 13)
#define P4_CCCR_ENABLE		0x00001000

/* member of ESCR */
#define P4_ESCR_EVENT_SELECT(x)	((x) << 25)
#define P4_ESCR_EVENT_MASK_BIT(x)	(1 << ((x) + 9))
#define P4_ESCR_TAG_VALUE3	0x00000100
#define P4_ESCR_TAG_VALUE2	0x00000080
#define P4_ESCR_TAG_VALUE1	0x00000040
#define P4_ESCR_TAG_VALUE0	0x00000020
#define P4_ESCR_TAG_ENABLE	0x00000010
#define P4_ESCR_T0_OS		0x00000008
#define P4_ESCR_T0_USR		0x00000004
#define P4_ESCR_T1_OS		0x00000002
#define P4_ESCR_T1_USR		0x00000001

static const hardmeter_event_mask_t p4_eventmask_nbogus_bogus[] = {
	{"nbogus", "The marked uops are not bogus.",
	 0, P4_ESCR_EVENT_MASK_BIT(0), 1},
	{"bogus", "The marked uops are bogus.",
	 0, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_instr_retired[] = {
	{"nbogusntag", "Non-bogus instructions that are not tagged.",
	 0, P4_ESCR_EVENT_MASK_BIT(0), 1},
	{"nbogustag", "Non-bogus instructions that are tagged.",
	 0, P4_ESCR_EVENT_MASK_BIT(1), 1},
	{"bogusntag", "Bogus instructions that are not tagged.",
	 0, P4_ESCR_EVENT_MASK_BIT(2), 0},
	{"bogustag", "Bogus instructions that are tagged.",
	 0, P4_ESCR_EVENT_MASK_BIT(3), 0},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_retired_branch_type[] = {
	{"conditional", "Conditional jumps.",
	 0, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{"call", "Indirect call branches.",
	 0, P4_ESCR_EVENT_MASK_BIT(2), 0},
	{"return", "Return branches.",
	 0, P4_ESCR_EVENT_MASK_BIT(3), 0},
	{"indirect", "Returns, indirect calls or indirect jumps.",
	 0, P4_ESCR_EVENT_MASK_BIT(4), 0},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_memory_loads[] = {
	{"nbogus", "The marked uops are not bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(0), 1},
	{"bogus", "The marked uops are bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{"tagloads", "The uop is a load operation.",
	 0, P4_ESCR_EVENT_MASK_BIT(1), 1},
	{"tagstores", "The uop is a store operation.",
	 0, P4_ESCR_EVENT_MASK_BIT(2), 0},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_memory_stores[] = {
	{"nbogus", "The marked uops are not bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(0), 1},
	{"bogus", "The marked uops are bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{"tagloads", "The uop is a load operation.",
	 0, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{"tagstores", "The uop is a store operation.",
	 0, P4_ESCR_EVENT_MASK_BIT(2), 1},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_memory_moves[] = {
	{"nbogus", "The marked uops are not bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(0), 1},
	{"bogus", "The marked uops are bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{"tagloads", "The uop is a load operation.",
	 0, P4_ESCR_EVENT_MASK_BIT(1), 1},
	{"tagstores", "The uop is a store operation.",
	 0, P4_ESCR_EVENT_MASK_BIT(2), 1},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_execution_tagging[] = {
	{"nbogus", "The marked uops are not bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(0) /* NOBOGUS0 */, 1},
	{"bogus", "The marked uops are bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(4) /* BOGUS0 */, 0},
	{"all", "Count all uops operationg.",
	 0, P4_ESCR_EVENT_MASK_BIT(15), 1},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_x87_simd_memory_moves_retired [] = {
	{"nbogus", "The marked uops are not bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(0) /* NOBOGUS0 */, 1},
	{"bogus", "The marked uops are bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(4) /* BOGUS0 */, 0},
	{"allp0", "Count all x87/SIMD store/moves uops.",
	 0, P4_ESCR_EVENT_MASK_BIT(3), 1},
	{"allp2", "Count all x87/SIMD load uops.",
	 0, P4_ESCR_EVENT_MASK_BIT(4), 1},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_mod_load_replay[] = {
	{"nbogus", "The marked uops are not bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(0), 1},
	{"bogus", "The marked uops are bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{"no_sta", "replayed because of unknown store address.",
	 0, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{"no_std", "replayed because of unknown store data.",
	 0, P4_ESCR_EVENT_MASK_BIT(3), 0},
	{"partial_data", "replayed because of partially overlapped data access between the load and store operations.",
	 0, P4_ESCR_EVENT_MASK_BIT(4), 1},
	{"unalgn_addr", "replayed because the lower 4 bits of the liner address do not match between the load and store operations.",
	 0, P4_ESCR_EVENT_MASK_BIT(5), 1},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_split_load_retired[] = {
	{"nbogus", "The marked uops are not bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(0), 1},
	{"bogus", "The marked uops are bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{"split_ld", "Split load",
	 0, P4_ESCR_EVENT_MASK_BIT(1), 1},
	{NULL, NULL, 0, 0, 0},
};

static const hardmeter_event_mask_t p4_eventmask_split_store_retired[] = {
	{"nbogus", "The marked uops are not bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(0), 1},
	{"bogus", "The marked uops are bogus.",
	 1, P4_ESCR_EVENT_MASK_BIT(1), 0},
	{"split_st", "Split store",
	 0, P4_ESCR_EVENT_MASK_BIT(1), 1},
	{NULL, NULL, 0, 0, 0},
};

/********************** instr_retired **********************/
#define INSTR_RETIRED_PMC	P4_IQ_CCCR4 | P4_FAST_RDPMC
#define INSTR_RETIRED_CCCR	P4_CCCR_OVF_PMI_T0 | P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(4) | P4_CCCR_ENABLE
#define INSTR_RETIRED_ESCR	P4_ESCR_EVENT_SELECT(0x2)

const struct vperfctr_control p4_ebs_instr_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 0,
		.nrictrs = 1,
		.pmc_map = {INSTR_RETIRED_PMC, },
		.evntsel = {INSTR_RETIRED_CCCR, },
		.evntsel_aux = {INSTR_RETIRED_ESCR, },
		.ibuffer_size = 524270,
	},
};

/********************** uop_retired **********************/
#define UOP_RETIRED_PMC	P4_IQ_CCCR4 | P4_FAST_RDPMC
#define UOP_RETIRED_CCCR	P4_CCCR_OVF_PMI_T0 | P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(4) | P4_CCCR_ENABLE
#define UOP_RETIRED_ESCR	P4_ESCR_EVENT_SELECT(0x1)

const struct vperfctr_control p4_ebs_uop_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 0,
		.nrictrs = 1,
		.pmc_map = {UOP_RETIRED_PMC, },
		.evntsel = {UOP_RETIRED_CCCR, },
		.evntsel_aux = {UOP_RETIRED_ESCR, },
		.ibuffer_size = 524270,
	},
};

/********************** front-end tagging  **********************/
/* front_end_event */
#define FRONT_END_EVENT_PMC	P4_IQ_CCCR4 | P4_FAST_RDPMC
#define FRONT_END_EVENT_CCCR	P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(5) | P4_CCCR_ENABLE
#define FRONT_END_EVENT_ESCR	P4_ESCR_EVENT_SELECT(0x8)

/* uop_type */
#define UOP_TYPE_PMC		P4_IQ_CCCR0 | P4_FAST_RDPMC
#define UOP_TYPE_CCCR		P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(2) | P4_CCCR_ENABLE
#define UOP_TYPE_ESCR		P4_ESCR_EVENT_SELECT(0x2)

static const struct vperfctr_control p4_pebs_front_end_event = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {UOP_TYPE_PMC, FRONT_END_EVENT_PMC, },
		.evntsel = {UOP_TYPE_CCCR, FRONT_END_EVENT_CCCR, },
		.evntsel_aux = {UOP_TYPE_ESCR, FRONT_END_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x02000000,
			.pebs_matrix_vert = 0,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

/********************** execution tagging **********************/
/* execution_event */
#define EXECUTION_EVENT_PMC	P4_IQ_CCCR4 | P4_FAST_RDPMC
#define EXECUTION_EVENT_CCCR	P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(5) | P4_CCCR_ENABLE
#define EXECUTION_EVENT_ESCR	P4_ESCR_EVENT_SELECT(0xC)

/* XXX_uop (packed_SP_uop, packed_DP_uop, scaler_SP_uop, scaler_DP_uop,
   64bit_mmx_uop, 128bit_mmx_uop, and x87_fp_uop except event select. */
#define XXX_UOP_PMC		P4_FLAME_CCCR0 | P4_FAST_RDPMC
#define XXX_UOP_CCCR		P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(1) | P4_CCCR_ENABLE
#define XXX_UOP_ESCR		P4_ESCR_TAG_VALUE0 | P4_ESCR_TAG_ENABLE

static const struct vperfctr_control p4_pebs_packed_sp_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {XXX_UOP_PMC, EXECUTION_EVENT_PMC, },
		.evntsel = {XXX_UOP_CCCR, EXECUTION_EVENT_CCCR, },
		.evntsel_aux = {XXX_UOP_ESCR | P4_ESCR_EVENT_SELECT(0x08), EXECUTION_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x02000000,
			.pebs_matrix_vert = 0,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_packed_dp_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {XXX_UOP_PMC, EXECUTION_EVENT_PMC, },
		.evntsel = {XXX_UOP_CCCR, EXECUTION_EVENT_CCCR, },
		.evntsel_aux = {XXX_UOP_ESCR | P4_ESCR_EVENT_SELECT(0x0C), EXECUTION_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x02000000,
			.pebs_matrix_vert = 0,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_scaler_sp_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {XXX_UOP_PMC, EXECUTION_EVENT_PMC, },
		.evntsel = {XXX_UOP_CCCR, EXECUTION_EVENT_CCCR, },
		.evntsel_aux = {XXX_UOP_ESCR | P4_ESCR_EVENT_SELECT(0x0A), EXECUTION_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x02000000,
			.pebs_matrix_vert = 0,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_scaler_dp_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {XXX_UOP_PMC, EXECUTION_EVENT_PMC, },
		.evntsel = {XXX_UOP_CCCR, EXECUTION_EVENT_CCCR, },
		.evntsel_aux = {XXX_UOP_ESCR | P4_ESCR_EVENT_SELECT(0x0E), EXECUTION_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x02000000,
			.pebs_matrix_vert = 0,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_64bit_mmx_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {XXX_UOP_PMC, EXECUTION_EVENT_PMC, },
		.evntsel = {XXX_UOP_CCCR, EXECUTION_EVENT_CCCR, },
		.evntsel_aux = {XXX_UOP_ESCR | P4_ESCR_EVENT_SELECT(0x02), EXECUTION_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x02000000,
			.pebs_matrix_vert = 0,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_128bit_mmx_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {XXX_UOP_PMC, EXECUTION_EVENT_PMC, },
		.evntsel = {XXX_UOP_CCCR, EXECUTION_EVENT_CCCR, },
		.evntsel_aux = {XXX_UOP_ESCR | P4_ESCR_EVENT_SELECT(0x1A), EXECUTION_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x02000000,
			.pebs_matrix_vert = 0,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_x87_fp_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {XXX_UOP_PMC, EXECUTION_EVENT_PMC, },
		.evntsel = {XXX_UOP_CCCR, EXECUTION_EVENT_CCCR, },
		.evntsel_aux = {XXX_UOP_ESCR | P4_ESCR_EVENT_SELECT(0x04), EXECUTION_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x02000000,
			.pebs_matrix_vert = 0,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_x87_simd_memory_moves_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {XXX_UOP_PMC, EXECUTION_EVENT_PMC, },
		.evntsel = {XXX_UOP_CCCR, EXECUTION_EVENT_CCCR, },
		.evntsel_aux = {XXX_UOP_ESCR | P4_ESCR_EVENT_SELECT(0x2E), EXECUTION_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x02000000,
			.pebs_matrix_vert = 0,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

/********************** replay tagging **********************/
/* replay_event */
#define REPLAY_EVENT_PMC	P4_IQ_CCCR4 | P4_FAST_RDPMC
#define REPLAY_EVENT_CCCR	P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(5) | P4_CCCR_ENABLE
#define REPLAY_EVENT_ESCR	P4_ESCR_EVENT_SELECT(0x9)

static const struct vperfctr_control p4_pebs_l1_cache_load_miss_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 0,
		.nrictrs = 1,
		.pmc_map = {REPLAY_EVENT_PMC, },
		.evntsel = {REPLAY_EVENT_CCCR, },
		.evntsel_aux = {REPLAY_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x03000001,
			.pebs_matrix_vert = 0x00000001,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_l2_cache_load_miss_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 0,
		.nrictrs = 1,
		.pmc_map = {REPLAY_EVENT_PMC, },
		.evntsel = {REPLAY_EVENT_CCCR, },
		.evntsel_aux = {REPLAY_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x03000002,
			.pebs_matrix_vert = 0x00000001,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_dtlb_load_miss_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 0,
		.nrictrs = 1,
		.pmc_map = {REPLAY_EVENT_PMC, },
		.evntsel = {REPLAY_EVENT_CCCR, },
		.evntsel_aux = {REPLAY_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x03000004,
			.pebs_matrix_vert = 0x00000001,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_dtlb_store_miss_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 0,
		.nrictrs = 1,
		.pmc_map = {REPLAY_EVENT_PMC, },
		.evntsel = {REPLAY_EVENT_CCCR, },
		.evntsel_aux = {REPLAY_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x03000004,
			.pebs_matrix_vert = 0x00000002,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

static const struct vperfctr_control p4_pebs_dtlb_all_miss_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 0,
		.nrictrs = 1,
		.pmc_map = {REPLAY_EVENT_PMC, },
		.evntsel = {REPLAY_EVENT_CCCR, },
		.evntsel_aux = {REPLAY_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x03000004,
			.pebs_matrix_vert = 0x00000003,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

#define MOB_LOAD_REPLAY_PMC	P4_BPU_CCCR0 | P4_FAST_RDPMC
#define MOB_LOAD_REPLAY_CCCR	P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(2) | P4_CCCR_ENABLE
#define MOB_LOAD_REPLAY_ESCR	P4_ESCR_EVENT_SELECT(0x3)

static const struct vperfctr_control p4_pebs_mod_load_replay_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {MOB_LOAD_REPLAY_PMC, REPLAY_EVENT_PMC, },
		.evntsel = {MOB_LOAD_REPLAY_CCCR, REPLAY_EVENT_CCCR, },
		.evntsel_aux = {MOB_LOAD_REPLAY_ESCR, REPLAY_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x03000200,
			.pebs_matrix_vert = 0x00000001,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

#define LOAD_PORT_REPLAY_PMC	P4_FLAME_CCCR2 | P4_FAST_RDPMC
#define LOAD_PORT_REPLAY_CCCR	P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(2) | P4_CCCR_ENABLE
#define LOAD_PORT_REPLAY_ESCR	P4_ESCR_EVENT_SELECT(0x4)

static const struct vperfctr_control p4_pebs_split_load_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {LOAD_PORT_REPLAY_PMC, REPLAY_EVENT_PMC, },
		.evntsel = {LOAD_PORT_REPLAY_CCCR, REPLAY_EVENT_CCCR, },
		.evntsel_aux = {LOAD_PORT_REPLAY_ESCR, REPLAY_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x03000400,
			.pebs_matrix_vert = 0x00000001,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};

#if 0
/* Table A-5 said: Select store_port_replay event with the MSR_SAAT_ESCR0... */
#define STORE_PORT_REPLAY_PMC	P4_FLAME_CCCR0 | P4_FAST_RDPMC
/* Table A-1 said: Must use ESCR1 for at-retirement counting. */
#define STORE_PORT_REPLAY_PMC	P4_FLAME_CCCR2 | P4_FAST_RDPMC

#define STORE_PORT_REPLAY_CCCR	P4_CCCR_ACTIVE_THREAD | P4_CCCR_ESCR_SELECT(2) | P4_CCCR_ENABLE
#define STORE_PORT_REPLAY_ESCR	P4_ESCR_EVENT_SELECT(0x5)

static const struct vperfctr_control p4_pebs_split_store_retired = {
	.si_signo = 0,
	.cpu_control = {
		.tsc_on = 1,
		.nractrs = 1,
		.nrictrs = 1,
		.pmc_map = {STORE_PORT_REPLAY_PMC, REPLAY_EVENT_PMC, },
		.evntsel = {STORE_PORT_REPLAY_CCCR, REPLAY_EVENT_CCCR, },
		.evntsel_aux = {STORE_PORT_REPLAY_ESCR, REPLAY_EVENT_ESCR, },
		.p4 = {
			.pebs_enable = 0x03000400,
			.pebs_matrix_vert = 0x00000002,
		},
		.ibuffer_size = PEBS_MAX_SIZE,
	},
};
#endif

hardmeter_template_t hardmeter_p4_template[] = {
	/* imprecise at-retirement event */
	{"imprecise at-retirement event", NULL, NULL, NULL, 0},
	{"instr_retired", "instruction retired",
	 &p4_ebs_instr_retired, p4_eventmask_instr_retired, 0},
	{"uop_retired", "uops retired",
	 &p4_ebs_uop_retired, p4_eventmask_nbogus_bogus, 0},
	/* precise front-end event */
	{"precise front-end event", NULL, NULL, NULL, 0},
	{"memory_loads", "memory loads",
	 &p4_pebs_front_end_event, p4_eventmask_memory_loads, 1},
	{"memory_stores", "memory stores",
	 &p4_pebs_front_end_event, p4_eventmask_memory_stores, 1},
	{"memory_moves", "memory loads and stores",
	 &p4_pebs_front_end_event, p4_eventmask_memory_moves, 1},
	/* precise execution event */
	{"precise execution event", NULL, NULL, NULL, 0},
	{"packed_sp_retired", "packed single-precision uop retired",
	 &p4_pebs_packed_sp_retired, p4_eventmask_execution_tagging, 1},
	{"packed_dp_retired", "packed double-precision uop retired",
	 &p4_pebs_packed_dp_retired, p4_eventmask_execution_tagging, 1},
	{"scaler_sp_retired", "scaler single-precision uop retired",
	 &p4_pebs_scaler_sp_retired, p4_eventmask_execution_tagging, 1},
	{"scaler_dp_retired", "scaler double-precision uop retired",
	 &p4_pebs_scaler_dp_retired, p4_eventmask_execution_tagging, 1},
	{"64bit_mmx_retired", "64bit SIMD integer uop retired",
	 &p4_pebs_64bit_mmx_retired, p4_eventmask_execution_tagging, 1},
	{"128bit_mmx_retired", "128bit SIMD integer uop retired",
	 &p4_pebs_128bit_mmx_retired, p4_eventmask_execution_tagging, 1},
	{"x87_fp_retired", "floating point instruction retired",
	 &p4_pebs_x87_fp_retired, p4_eventmask_execution_tagging, 1},
	{"x87_simd_memory_moves_retired", "x87/SIMD store/moves/load uop retired",
	 &p4_pebs_x87_simd_memory_moves_retired, p4_eventmask_x87_simd_memory_moves_retired, 1},
	/* precise replay event */
	{"precise replay event", NULL, NULL, NULL, 0},
	{"l1_cache_miss", "1st level cache load miss",
	 &p4_pebs_l1_cache_load_miss_retired, p4_eventmask_nbogus_bogus, 1},
	{"l2_cache_miss", "2nd level cache load miss",
	 &p4_pebs_l2_cache_load_miss_retired, p4_eventmask_nbogus_bogus, 1},
	{"dtlb_load_miss", "DTLB load miss",
	 &p4_pebs_dtlb_load_miss_retired, p4_eventmask_nbogus_bogus, 1},
	{"dtlb_stor_miss", "DTLB store miss",
	 &p4_pebs_dtlb_store_miss_retired, p4_eventmask_nbogus_bogus, 1},
	{"dtlb_all_miss", "DTLB load and store miss",
	 &p4_pebs_dtlb_all_miss_retired, p4_eventmask_nbogus_bogus, 1},
	{"mob_load_replay_retired","MOB(memory order buffer) causes load replay",
	 &p4_pebs_mod_load_replay_retired, p4_eventmask_mod_load_replay, 1},
	{"split_load_retired", "replayed events at the load port.",
	 &p4_pebs_split_load_retired, p4_eventmask_split_load_retired, 1},
#if 0
	{"split_store_retired", "replayed events at the store port.",
	 &p4_pebs_split_store_retired, p4_eventmask_split_store_retired, 1},
#endif
	{NULL, NULL, NULL, 0},
};

int hardmeter_p4_fill_control(struct vperfctr_control *dest, const hardmeter_option_t *opt)
{
	int i;
	int nctr;
	unsigned int bits = 0;
	const hardmeter_event_mask_t *e;

	if (opt->user)
		bits |= P4_ESCR_T0_USR;
	if (opt->kernel)
		bits |= P4_ESCR_T0_OS;
	if (!bits)
		return -1;
	if (opt->interval <= 0)
		return -1;

	*dest = *(opt->template->control);
	nctr = dest->cpu_control.nractrs + dest->cpu_control.nrictrs;
	for (i = 0; i < nctr; i++) {
		dest->cpu_control.evntsel_aux[i] |= bits;
	}
	dest->cpu_control.ireset[dest->cpu_control.nractrs] = -(opt->interval);
	if (opt->count >= 0) {
		dest->cpu_control.ibuffer_size = opt->count;
	}
	if (opt->template->eventmask) {
		for (e = opt->template->eventmask; e->name != NULL; e++) {
			if (e->is_default) {
				dest->cpu_control.evntsel_aux[e->position] |= e->val;
			}
		}
	}
	return 0;
}
