ipsw/pkg/ctf/types.go

package ctf

//go:generate go tool stringer -type=kind,floatEncoding -output types_string.go

import (
	"encoding/json"
	"fmt"
	"math"
	"strings"
)

/*
 * CTF - Compact ANSI-C Type Format
 *
 * This file format can be used to compactly represent the information needed
 * by a debugger to interpret the ANSI-C types used by a given program.
 * Traditionally, this kind of information is generated by the compiler when
 * invoked with the -g flag and is stored in "stabs" strings or in the more
 * modern DWARF format.  CTF provides a representation of only the information
 * that is relevant to debugging a complex, optimized C program such as the
 * operating system kernel in a form that is significantly more compact than
 * the equivalent stabs or DWARF representation.  The format is data-model
 * independent, so consumers do not need different code depending on whether
 * they are 32-bit or 64-bit programs.  CTF assumes that a standard ELF symbol
 * table is available for use in the debugger, and uses the structure and data
 * of the symbol table to avoid storing redundant information.  The CTF data
 * may be compressed on disk or in memory, indicated by a bit in the header.
 * CTF may be interpreted in a raw disk file, or it may be stored in an ELF
 * section, typically named .SUNW_ctf.  Data structures are aligned so that
 * a raw CTF file or CTF ELF section may be manipulated using mmap(2).
 *
 * The CTF file or section itself has the following structure:
 *
 * +--------+--------+---------+----------+-------+--------+
 * |  file  |  type  |  data   | function | data  | string |
 * | header | labels | objects |   info   | types | table  |
 * +--------+--------+---------+----------+-------+--------+
 *
 * The file header stores a magic number and version information, encoding
 * flags, and the byte offset of each of the sections relative to the end of the
 * header itself.  If the CTF data has been uniquified against another set of
 * CTF data, a reference to that data also appears in the the header.  This
 * reference is the name of the label corresponding to the types uniquified
 * against.
 *
 * Following the header is a list of labels, used to group the types included in
 * the data types section.  Each label is accompanied by a type ID i.  A given
 * label refers to the group of types whose IDs are in the range [0, i].
 *
 * Data object and function records are stored in the same order as they appear
 * in the corresponding symbol table, except that symbols marked SHN_UNDEF are
 * not stored and symbols that have no type data are padded out with zeroes.
 * For each data object, the type ID (a small integer) is recorded.  For each
 * function, the type ID of the return type and argument types is recorded.
 *
 * The data types section is a list of variable size records that represent each
 * type, in order by their ID.  The types themselves form a directed graph,
 * where each node may contain one or more outgoing edges to other type nodes,
 * denoted by their ID.
 *
 * Strings are recorded as a string table ID (0 or 1) and a byte offset into the
 * string table.  String table 0 is the internal CTF string table.  String table
 * 1 is the external string table, which is the string table associated with the
 * ELF symbol table for this object.  CTF does not record any strings that are
 * already in the symbol table, and the CTF string table does not contain any
 * duplicated strings.
 *
 * If the CTF data has been merged with another parent CTF object, some outgoing
 * edges may refer to type nodes that exist in another CTF object.  The debugger
 * and libctf library are responsible for connecting the appropriate objects
 * together so that the full set of types can be explored and manipulated.
 */

const (
	MAGIC       = 0xcff1     /* magic number identifying header */
	MAX_TYPE    = 0xfffffffe /* max type identifier value */
	MAX_NAME    = 0x7fffffff /* max offset into a string table */
	MAX_VLEN    = 0x3ff      /* max struct, union, enum members or args */
	MAX_INTOFF  = 0xff       /* max offset of intrinsic value in bits */
	MAX_INTBITS = 0xffff     /* max size of an intrinsic in bits */
	/* See ctf_type_t */
	MAX_SIZE   = 0xfffffffe /* max size of a type in bytes */
	LSIZE_SENT = 0xffffffff /* sentinel for ctt_size */
	MAX_LSIZE  = math.MaxUint64
	/* data format version number */
	VERSION_1     = 1
	VERSION_2     = 2
	VERSION_3     = 3
	VERSION_4     = 4
	VERSION       = VERSION_4 /* current version */
	F_COMPRESS    = 0x1       /* data buffer is compressed */
	F_NEWFUNCINFO = 0x2       /* New v3 func info section format.  */
)

// ctf_preamble
type preamble struct {
	Magic   uint16 `json:"magic,omitempty"`   /* magic number (MAGIC) */
	Version uint8  `json:"version,omitempty"` /* data format version number (VERSION) */
	Flags   uint8  `json:"flags,omitempty"`   /* flags (see below) */
}

// ctf_header_t
type header_t struct {
	Preamble       preamble `json:"preamble"`
	ParentLabelRef uint32   `json:"parent_label_ref,omitempty"` /* ref to name of parent lbl uniq'd against */
	ParentNameRef  uint32   `json:"parent_name_ref,omitempty"`  /* ref to basename of parent */
	LabelOffset    uint32   `json:"label_offset,omitempty"`     /* offset of label section */
	ObjOffset      uint32   `json:"obj_offset,omitempty"`       /* offset of object section */
	FuncOffset     uint32   `json:"func_offset,omitempty"`      /* offset of function section */
	TypeOffset     uint32   `json:"type_offset,omitempty"`      /* offset of type section */
	StrOffset      uint32   `json:"str_offset,omitempty"`       /* offset of string section */
	StrLen         uint32   `json:"str_len,omitempty"`          /* length of string section in bytes */
}

type header struct {
	header_t
	ParentLabel string `json:"parent_label,omitempty"` /* name of parent lbl uniq'd against */
	ParentName  string `json:"parent_name,omitempty"`  /* basename of parent */
}

// ctf_lblent_t
type lblent struct {
	Label     uint32 /* ref to name of label */
	TypeIndex uint32 /* last type associated with this label */
}

// ctf_stype
type stype struct {
	Name       uint32 /* reference to name in string table */
	Info       info   /* encoded kind, variant length (see below) */
	SizeOrType uint32 /* UNION {
	uint32_t _size - size of entire type in bytes
	uint32_t _type - reference to another type
	} */
}

// ctf_stype_v1
type stypeV1 struct {
	Name       uint32 /* reference to name in string table */
	Info       infoV1 /* encoded kind, variant length (see below) */
	SizeOrType uint16 /* UNION {
	uint16_t _size - size of entire type in bytes
	uint16_t _type - reference to another type
	} */
}

/*
 * ctf_type_t
 * type sizes, measured in bytes, come in two flavors.  99% of them fit within
 * (USHRT_MAX - 1), and thus can be stored in the ctt_size member of a
 * ctf_stype_t.  The maximum value for these sizes is MAX_SIZE.  The sizes
 * larger than MAX_SIZE must be stored in the ctt_lsize member of a
 * ctf_type_t.  Use of this member is indicated by the presence of
 * LSIZE_SENT in ctt_size.
 */
type ctftype struct {
	stype
	LSizeHI uint32 /* high 32 bits of type size in bytes */
	LSizeLO uint32 /* low 32 bits of type size in bytes */
}

func (t ctftype) LSize() uint64 {
	return uint64(t.LSizeHI)<<32 | uint64(t.LSizeLO)
}

type ctftypeV1 struct {
	stypeV1
	LSizeHI uint32 /* high 32 bits of type size in bytes */
	LSizeLO uint32 /* low 32 bits of type size in bytes */
}

func (t ctftypeV1) LSize() uint64 {
	return uint64(t.LSizeHI)<<32 | uint64(t.LSizeLO)
}

type Info interface {
	Kind() kind
	IsRoot() bool
	VarLen() uint16
	String() string
	MarshalJSON() ([]byte, error)
}

/*
 * The following macros compose and decompose values for ctt_info and
 * ctt_name, as well as other structures that contain name references.
 *
 *             -----------------------------------
 * ctt_info:   | reserved | kind | isroot | vlen |
 *             -----------------------------------
 *             31         15   11    10    9     0
 *
 * kind = CTF_INFO_KIND(c.ctt_info);     <-- CTF_K_* value (see below)
 * vlen = CTF_INFO_VLEN(c.ctt_info);     <-- length of variable data list
 *
 * stid = CTF_NAME_STID(c.ctt_name);     <-- string table id number (0 or 1)
 * offset = CTF_NAME_OFFSET(c.ctt_name); <-- string table byte offset
 *
 * c.ctt_info = CTF_TYPE_INFO(kind, vlen);
 * c.ctt_name = CTF_TYPE_NAME(stid, offset);
 */
type info uint32

func (i info) Kind() kind {
	return kind((i & 0xf800) >> 11)
}
func (i info) IsRoot() bool {
	return ((i & 0x0400) >> 10) != 0
}
func (i info) VarLen() uint16 {
	return uint16(i) & MAX_VLEN
}
func (i info) String() string {
	return fmt.Sprintf("kind: %s, is_root: %t, len: %d", i.Kind(), i.IsRoot(), i.VarLen())
}
func (i info) MarshalJSON() ([]byte, error) {
	return json.Marshal(&struct {
		Kind   string `json:"kind,omitempty"`
		IsRoot bool   `json:"is_root,omitempty"`
		VarLen uint16 `json:"var_len,omitempty"`
	}{
		Kind:   i.Kind().String(),
		IsRoot: i.IsRoot(),
		VarLen: i.VarLen(),
	})
}

type infoV1 uint16

func (i infoV1) Kind() kind {
	return kind((i & 0xf800) >> 11)
}
func (i infoV1) IsRoot() bool {
	return ((i & 0x0400) >> 10) != 0
}
func (i infoV1) VarLen() uint16 {
	return uint16(i) & MAX_VLEN
}
func (i infoV1) String() string {
	return fmt.Sprintf("kind: %s, is_root: %t, len: %d", i.Kind(), i.IsRoot(), i.VarLen())
}
func (i infoV1) MarshalJSON() ([]byte, error) {
	return json.Marshal(&struct {
		Kind   string `json:"kind,omitempty"`
		IsRoot bool   `json:"is_root,omitempty"`
		VarLen uint16 `json:"var_len,omitempty"`
	}{
		Kind:   i.Kind().String(),
		IsRoot: i.IsRoot(),
		VarLen: i.VarLen(),
	})
}

type name uint32

func (n name) StrID() uint32 {
	return uint32(n >> 31)
}
func (n name) NameOffset() uint32 {
	return uint32(n & 0x7fffffff)
}

type kind uint16

const (
	/*
	 * Values for TYPE_KIND().  If the kind has an associated data list,
	 * INFO_VLEN() will extract the number of elements in the list, and
	 * the type of each element is shown in the comments below.
	 */
	UNKNOWN  kind = 0 /* unknown type (used for padding) */
	INTEGER  kind = 1 /* variant data is INT_DATA() (see below) */
	FLOAT    kind = 2 /* variant data is DATA() (see below) */
	POINTER  kind = 3 /* ctt_type is referenced type */
	ARRAY    kind = 4 /* variant data is single ctf_array_t */
	FUNCTION kind = 5 /* ctt_type is return type, variant data is */
	/* list of argument types (uint32_t's) */
	STRUCT   kind = 6  /* variant data is list of ctf_member_t's */
	UNION    kind = 7  /* variant data is list of ctf_member_t's */
	ENUM     kind = 8  /* variant data is list of ctf_enum_t's */
	FORWARD  kind = 9  /* no additional data; ctt_name is tag */
	TYPEDEF  kind = 10 /* ctt_type is referenced type */
	VOLATILE kind = 11 /* ctt_type is base type */
	CONST    kind = 12 /* ctt_type is base type */
	RESTRICT kind = 13 /* ctt_type is base type */

	PTRAUTH kind = 14 /* variant data is PTRAUTH_DATA (see below) */

	MAX kind = 31 /* Maximum possible K_* value */
)

/*
* Values for ctt_type when kind is INTEGER.  The flags, offset in bits,
* and size in bits are encoded as a single word using the following macros.
 */
type intEncoding uint32

func (e intEncoding) Encoding() intEncoding {
	return e & 0xff000000 >> 24
}
func (e intEncoding) Offset() uint32 {
	return uint32(e&0x00ff0000) >> 16
}
func (e intEncoding) Bits() uint32 {
	return uint32(e & 0x0000ffff)
}
func (e intEncoding) String() string {
	var fmtStr string
	if e == 0 || e&^(SIGNED|CHAR|BOOL|VARARGS) != 0 {
		return fmt.Sprintf("%#x", uint32(e))
	}
	if (e & SIGNED) != 0 {
		fmtStr += " SIGNED"
	}
	if (e & CHAR) != 0 {
		fmtStr += " CHAR"
	}
	if (e & BOOL) != 0 {
		fmtStr += " BOOL"
	}
	if (e & VARARGS) != 0 {
		fmtStr += " VARARGS"
	}
	return strings.TrimSpace(fmtStr)
}
func (e intEncoding) MarshalJSON() ([]byte, error) {
	return json.Marshal(&struct {
		Encoding string `json:"encoding,omitempty"`
		Offset   uint32 `json:"offset,omitempty"`
		Bits     uint32 `json:"bits,omitempty"`
	}{
		Encoding: e.Encoding().String(),
		Offset:   e.Offset(),
		Bits:     e.Bits(),
	})
}

const (
	SIGNED  intEncoding = 0x01 /* integer is signed (otherwise unsigned) */
	CHAR    intEncoding = 0x02 /* character display format */
	BOOL    intEncoding = 0x04 /* boolean display format */
	VARARGS intEncoding = 0x08 /* varargs display format */
)

/*
* Values for ctt_type when kind is K_FLOAT.  The encoding, offset in bits,
* and size in bits are encoded as a single word using the following macros.
 */
type floatEncoding uint32

func (e floatEncoding) Encoding() floatEncoding {
	return e & 0xff000000 >> 24
}
func (e floatEncoding) Offset() uint32 {
	return uint32(e&0x00ff0000) >> 16
}
func (e floatEncoding) Bits() uint32 {
	return uint32(e & 0x0000ffff)
}
func (e floatEncoding) MarshalJSON() ([]byte, error) {
	return json.Marshal(&struct {
		Encoding string `json:"encoding,omitempty"`
		Offset   uint32 `json:"offset,omitempty"`
		Bits     uint32 `json:"bits,omitempty"`
	}{
		Encoding: e.Encoding().String(),
		Offset:   e.Offset(),
		Bits:     e.Bits(),
	})
}

const (
	SINGLE   floatEncoding = 1  /* IEEE 32-bit float encoding */
	DOUBLE   floatEncoding = 2  /* IEEE 64-bit float encoding */
	CPLX     floatEncoding = 3  /* Complex encoding */
	DCPLX    floatEncoding = 4  /* Double complex encoding */
	LDCPLX   floatEncoding = 5  /* Long double complex encoding */
	LDOUBLE  floatEncoding = 6  /* Long double encoding */
	INTRVL   floatEncoding = 7  /* Interval (2x32-bit) encoding */
	DINTRVL  floatEncoding = 8  /* Double interval (2x64-bit) encoding */
	LDINTRVL floatEncoding = 9  /* Long double interval (2x128-bit) encoding */
	IMAGRY   floatEncoding = 10 /* Imaginary (32-bit) encoding */
	DIMAGRY  floatEncoding = 11 /* Long imaginary (64-bit) encoding */
	LDIMAGRY floatEncoding = 12 /* Long double imaginary (128-bit) encoding */
)

/*
* Variant data associated with PTRAUTH. The key, discriminator
* and whether the pointer is discriminated are encoded as a single word
* using the following macros.
 */
type ptrAuthData uint32

func (p ptrAuthData) Discriminated() bool {
	return uint32(p&0xff000000>>24) != 0
}
func (p ptrAuthData) Key() string {
	name := []string{"IA", "IB", "DA", "DB"}
	key := uint64(p&0x00ff0000) >> 16
	if key >= 4 {
		return "ERROR"
	}
	return name[key]
}
func (p ptrAuthData) Discriminator() uint32 {
	return uint32(p & 0x0000ffff)
}
func (p ptrAuthData) MarshalJSON() ([]byte, error) {
	return json.Marshal(&struct {
		Key       string `json:"key,omitempty"`
		AddrDiv   bool   `json:"addr_div,omitempty"`
		Diversity uint32 `json:"diversity,omitempty"`
	}{
		Key:       p.Key(),
		AddrDiv:   p.Discriminated(),
		Diversity: p.Discriminator(),
	})
}

// ctf_array_t
type array struct {
	Contents    uint32 `json:"contents,omitempty"`     /* reference to type of array contents */
	Index       uint32 `json:"index,omitempty"`        /* reference to type of array index */
	NumElements uint32 `json:"num_elements,omitempty"` /* number of elements */
}

// ctf_array_v1_t
type arrayV1 struct {
	Contents    uint16 `json:"contents,omitempty"`     /* reference to type of array contents */
	Index       uint16 `json:"index,omitempty"`        /* reference to type of array index */
	NumElements uint32 `json:"num_elements,omitempty"` /* number of elements */
}

/*
 * Most structure members have bit offsets that can be expressed using a
 * short.  Some don't.  ctf_member_t is used for structs which cannot
 * contain any of these large offsets, whereas ctf_lmember_t is used in the
 * latter case.  If ctt_size for a given struct is >= 8192 bytes, all members
 * will be stored as type ctf_lmember_t.
 */
const LSTRUCT_THRESH = 8192

// ctf_member_t
type member struct {
	Name   uint32 /* reference to name in string table */
	Type   uint32 /* reference to type of member */
	Offset uint16 /* offset of this member in bits */
	_      uint16 // padding ?
}

// ctf_member_v1
type memberV1 struct {
	Name   uint32 /* reference to name in string table */
	Type   uint16 /* reference to type of member */
	Offset uint16 /* offset of this member in bits */
}

// ctf_lmember_t
type lmember struct {
	Name     uint32 /* reference to name in string table */
	Type     uint32 /* reference to type of member */
	OffsetHI uint32 /* high 32 bits of member offset in bits */
	OffsetLO uint32 /* low 32 bits of member offset in bits */
}

func (l lmember) Offset() uint64 {
	return uint64(l.OffsetHI)<<32 | uint64(l.OffsetLO)
}

// ctf_lmember_v1
type lmemberV1 struct {
	Name     uint32 /* reference to name in string table */
	Type     uint16 /* reference to type of member */
	Pad      uint16 /* padding */
	OffsetHI uint32 /* high 32 bits of member offset in bits */
	OffsetLO uint32 /* low 32 bits of member offset in bits */
}

func (l lmemberV1) Offset() uint64 {
	return uint64(l.OffsetHI)<<32 | uint64(l.OffsetLO)
}

// ctf_enum_t
type enum struct {
	Name  uint32 /* reference to name in string table */
	Value int32  /* value associated with this name */
}