| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922 |
- #!/usr/bin/python3
- # ELF support functionality for Python.
- # Copyright (C) 2022-2026 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Lesser General Public
- # License as published by the Free Software Foundation; either
- # version 2.1 of the License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Lesser General Public License for more details.
- #
- # You should have received a copy of the GNU Lesser General Public
- # License along with the GNU C Library; if not, see
- # <https://www.gnu.org/licenses/>.
- """Basic ELF parser.
- Use Image.readfile(path) to read an ELF file into memory and begin
- parsing it.
- """
- import collections
- import functools
- import os
- import struct
- import glibcpp
- class _MetaNamedValue(type):
- """Used to set up _NamedValue subclasses."""
- @classmethod
- def __prepare__(metacls, cls, bases, **kwds):
- # Indicates an int-based class. Needed for types like Shn.
- int_based = False
- for base in bases:
- if issubclass(base, int):
- int_based = int
- break
- return dict(by_value={},
- by_name={},
- prefix=None,
- _int_based=int_based)
- def __contains__(self, other):
- return other in self.by_value
- class _NamedValue(metaclass=_MetaNamedValue):
- """Typed, named integer constants.
- Constants have the following instance attributes:
- name: The full name of the constant (e.g., "PT_NULL").
- short_name: The name with of the constant without the prefix ("NULL").
- value: The integer value of the constant.
- The following class attributes are available:
- by_value: A dict mapping integers to constants.
- by_name: A dict mapping strings to constants.
- prefix: A string that is removed from the start of short names, or None.
- """
- def __new__(cls, arg0, arg1=None):
- """Instance creation.
- For the one-argument form, the argument must be a string, an
- int, or an instance of this class. Strings are looked up via
- by_name. Values are looked up via by_value; if value lookup
- fails, a new unnamed instance is returned. Instances of this
- class a re returned as-is.
- The two-argument form expects the name (a string) and the
- value (an integer). A new instance is created in this case.
- The instance is not registered in the by_value/by_name
- dictionaries (but the caller can do that).
- """
- typ0 = type(arg0)
- if arg1 is None:
- if isinstance(typ0, cls):
- # Re-use the existing object.
- return arg0
- if typ0 is int:
- by_value = cls.by_value
- try:
- return by_value[arg0]
- except KeyError:
- # Create a new object of the requested value.
- if cls._int_based:
- result = int.__new__(cls, arg0)
- else:
- result = object.__new__(cls)
- result.value = arg0
- result.name = None
- return result
- if typ0 is str:
- by_name = cls.by_name
- try:
- return by_name[arg0]
- except KeyError:
- raise ValueError('unknown {} constant: {!r}'.format(
- cls.__name__, arg0))
- else:
- # Types for the two-argument form are rigid.
- if typ0 is not str and typ0 is not None:
- raise ValueError('type {} of name {!r} should be str'.format(
- typ0.__name__, arg0))
- if type(arg1) is not int:
- raise ValueError('type {} of value {!r} should be int'.format(
- type(arg1).__name__, arg1))
- # Create a new named constants.
- if cls._int_based:
- result = int.__new__(cls, arg1)
- else:
- result = object.__new__(cls)
- result.value = arg1
- result.name = arg0
- # Set up the short_name attribute.
- prefix = cls.prefix
- if prefix and arg0.startswith(prefix):
- result.short_name = arg0[len(prefix):]
- else:
- result.short_name = arg0
- return result
- def __str__(self):
- name = self.name
- if name:
- return name
- else:
- return str(self.value)
- def __repr__(self):
- name = self.name
- if name:
- return name
- else:
- return '{}({})'.format(self.__class__.__name__, self.value)
- def __setattr__(self, name, value):
- # Prevent modification of the critical attributes once they
- # have been set.
- if name in ('name', 'value', 'short_name') and hasattr(self, name):
- raise AttributeError('can\'t set attribute {}'.format(name))
- object.__setattr__(self, name, value)
- @functools.total_ordering
- class _TypedConstant(_NamedValue):
- """Base class for integer-valued optionally named constants.
- This type is not an integer type.
- """
- def __eq__(self, other):
- return isinstance(other, self.__class__) and self.value == other.value
- def __lt__(self, other):
- return isinstance(other, self.__class__) and self.value <= other.value
- def __hash__(self):
- return hash(self.value)
- class _IntConstant(_NamedValue, int):
- """Base class for integer-like optionally named constants.
- Instances compare equal to the integer of the same value, and can
- be used in integer arithmetic.
- """
- pass
- class _FlagConstant(_TypedConstant, int):
- pass
- def _parse_elf_h():
- """Read ../elf/elf.h and return a dict with the constants in it."""
- path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
- '..', 'elf', 'elf.h')
- class TokenizerReporter:
- """Report tokenizer errors to standard output."""
- def __init__(self):
- self.errors = 0
- def error(self, token, message):
- self.errors += 1
- print('{}:{}:{}: error: {}'.format(
- path, token.line, token.column, message))
- reporter = TokenizerReporter()
- with open(path) as inp:
- tokens = glibcpp.tokenize_c(inp.read(), reporter)
- if reporter.errors:
- raise IOError('parse error in elf.h')
- class MacroReporter:
- """Report macro errors to standard output."""
- def __init__(self):
- self.errors = 0
- def error(self, line, message):
- self.errors += 1
- print('{}:{}: error: {}'.format(path, line, message))
- def note(self, line, message):
- print('{}:{}: note: {}'.format(path, line, message))
- reporter = MacroReporter()
- result = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
- if reporter.errors:
- raise IOError('parse error in elf.h')
- return result
- _elf_h = _parse_elf_h()
- del _parse_elf_h
- _elf_h_processed = set()
- def _register_elf_h(cls, prefix=None, skip=(), ranges=False, parent=None):
- prefix = prefix or cls.prefix
- if not prefix:
- raise ValueError('missing prefix for {}'.format(cls.__name__))
- by_value = cls.by_value
- by_name = cls.by_name
- processed = _elf_h_processed
- skip = set(skip)
- skip.add(prefix + 'NUM')
- if ranges:
- skip.add(prefix + 'LOOS')
- skip.add(prefix + 'HIOS')
- skip.add(prefix + 'LOPROC')
- skip.add(prefix + 'HIPROC')
- cls.os_range = (_elf_h[prefix + 'LOOS'], _elf_h[prefix + 'HIOS'])
- cls.proc_range = (_elf_h[prefix + 'LOPROC'], _elf_h[prefix + 'HIPROC'])
- # Inherit the prefix from the parent if not set.
- if parent and cls.prefix is None and parent.prefix is not None:
- cls.prefix = parent.prefix
- processed_len_start = len(processed)
- for name, value in _elf_h.items():
- if name in skip or name in processed:
- continue
- if name.startswith(prefix):
- processed.add(name)
- if value in by_value:
- raise ValueError('duplicate value {}: {}, {}'.format(
- value, name, by_value[value]))
- obj = cls(name, value)
- by_value[value] = obj
- by_name[name] = obj
- setattr(cls, name, obj)
- if parent:
- # Make the symbolic name available through the parent as well.
- parent.by_name[name] = obj
- setattr(parent, name, obj)
- if len(processed) == processed_len_start:
- raise ValueError('nothing matched prefix {!r}'.format(prefix))
- class ElfClass(_TypedConstant):
- """ELF word size. Type of EI_CLASS values."""
- _register_elf_h(ElfClass, prefix='ELFCLASS')
- class ElfData(_TypedConstant):
- """ELF endianness. Type of EI_DATA values."""
- _register_elf_h(ElfData, prefix='ELFDATA')
- class Machine(_TypedConstant):
- """ELF machine type. Type of values in Ehdr.e_machine field."""
- prefix = 'EM_'
- _register_elf_h(Machine, skip=('EM_ARC_A5',))
- class Et(_TypedConstant):
- """ELF file type. Type of ET_* values and the Ehdr.e_type field."""
- prefix = 'ET_'
- _register_elf_h(Et, ranges=True)
- class Shn(_IntConstant):
- """ELF reserved section indices."""
- prefix = 'SHN_'
- class ShnMIPS(Shn):
- """Supplemental SHN_* constants for EM_MIPS."""
- class ShnPARISC(Shn):
- """Supplemental SHN_* constants for EM_PARISC."""
- _register_elf_h(ShnMIPS, prefix='SHN_MIPS_', parent=Shn)
- _register_elf_h(ShnPARISC, prefix='SHN_PARISC_', parent=Shn)
- _register_elf_h(Shn, skip='SHN_LORESERVE SHN_HIRESERVE'.split(), ranges=True)
- class Sht(_TypedConstant):
- """ELF section types. Type of SHT_* values."""
- prefix = 'SHT_'
- class ShtALPHA(Sht):
- """Supplemental SHT_* constants for EM_ALPHA."""
- class ShtARC(Sht):
- """Supplemental SHT_* constants for EM_ARC."""
- class ShtARM(Sht):
- """Supplemental SHT_* constants for EM_ARM."""
- class ShtCSKY(Sht):
- """Supplemental SHT_* constants for EM_CSKY."""
- class ShtIA_64(Sht):
- """Supplemental SHT_* constants for EM_IA_64."""
- class ShtMIPS(Sht):
- """Supplemental SHT_* constants for EM_MIPS."""
- class ShtPARISC(Sht):
- """Supplemental SHT_* constants for EM_PARISC."""
- class ShtRISCV(Sht):
- """Supplemental SHT_* constants for EM_RISCV."""
- _register_elf_h(ShtALPHA, prefix='SHT_ALPHA_', parent=Sht)
- _register_elf_h(ShtARC, prefix='SHT_ARC_', parent=Sht)
- _register_elf_h(ShtARM, prefix='SHT_ARM_', parent=Sht)
- _register_elf_h(ShtCSKY, prefix='SHT_CSKY_', parent=Sht)
- _register_elf_h(ShtIA_64, prefix='SHT_IA_64_', parent=Sht)
- _register_elf_h(ShtMIPS, prefix='SHT_MIPS_', parent=Sht)
- _register_elf_h(ShtPARISC, prefix='SHT_PARISC_', parent=Sht)
- _register_elf_h(ShtRISCV, prefix='SHT_RISCV_', parent=Sht)
- _register_elf_h(Sht, ranges=True,
- skip='SHT_LOSUNW SHT_HISUNW SHT_LOUSER SHT_HIUSER'.split())
- class Pf(_FlagConstant):
- """Program header flags. Type of Phdr.p_flags values."""
- prefix = 'PF_'
- class PfARM(Pf):
- """Supplemental PF_* flags for EM_ARM."""
- class PfHP(Pf):
- """Supplemental PF_* flags for HP-UX."""
- class PfIA_64(Pf):
- """Supplemental PF_* flags for EM_IA_64."""
- class PfMIPS(Pf):
- """Supplemental PF_* flags for EM_MIPS."""
- class PfPARISC(Pf):
- """Supplemental PF_* flags for EM_PARISC."""
- _register_elf_h(PfARM, prefix='PF_ARM_', parent=Pf)
- _register_elf_h(PfHP, prefix='PF_HP_', parent=Pf)
- _register_elf_h(PfIA_64, prefix='PF_IA_64_', parent=Pf)
- _register_elf_h(PfMIPS, prefix='PF_MIPS_', parent=Pf)
- _register_elf_h(PfPARISC, prefix='PF_PARISC_', parent=Pf)
- _register_elf_h(Pf, skip='PF_MASKOS PF_MASKPROC'.split())
- class Shf(_FlagConstant):
- """Section flags. Type of Shdr.sh_type values."""
- prefix = 'SHF_'
- class ShfALPHA(Shf):
- """Supplemental SHF_* constants for EM_ALPHA."""
- class ShfARM(Shf):
- """Supplemental SHF_* constants for EM_ARM."""
- class ShfIA_64(Shf):
- """Supplemental SHF_* constants for EM_IA_64."""
- class ShfMIPS(Shf):
- """Supplemental SHF_* constants for EM_MIPS."""
- class ShfPARISC(Shf):
- """Supplemental SHF_* constants for EM_PARISC."""
- _register_elf_h(ShfALPHA, prefix='SHF_ALPHA_', parent=Shf)
- _register_elf_h(ShfARM, prefix='SHF_ARM_', parent=Shf)
- _register_elf_h(ShfIA_64, prefix='SHF_IA_64_', parent=Shf)
- _register_elf_h(ShfMIPS, prefix='SHF_MIPS_', parent=Shf)
- _register_elf_h(ShfPARISC, prefix='SHF_PARISC_', parent=Shf)
- _register_elf_h(Shf, skip='SHF_MASKOS SHF_MASKPROC'.split())
- class Stb(_TypedConstant):
- """ELF symbol binding type."""
- prefix = 'STB_'
- _register_elf_h(Stb, ranges=True)
- class Stt(_TypedConstant):
- """ELF symbol type."""
- prefix = 'STT_'
- class SttARM(Sht):
- """Supplemental STT_* constants for EM_ARM."""
- class SttPARISC(Sht):
- """Supplemental STT_* constants for EM_PARISC."""
- class SttSPARC(Sht):
- """Supplemental STT_* constants for EM_SPARC."""
- STT_SPARC_REGISTER = 13
- class SttX86_64(Sht):
- """Supplemental STT_* constants for EM_X86_64."""
- _register_elf_h(SttARM, prefix='STT_ARM_', parent=Stt)
- _register_elf_h(SttPARISC, prefix='STT_PARISC_', parent=Stt)
- _register_elf_h(SttSPARC, prefix='STT_SPARC_', parent=Stt)
- _register_elf_h(Stt, ranges=True)
- class Pt(_TypedConstant):
- """ELF program header types. Type of Phdr.p_type."""
- prefix = 'PT_'
- class PtAARCH64(Pt):
- """Supplemental PT_* constants for EM_AARCH64."""
- class PtARM(Pt):
- """Supplemental PT_* constants for EM_ARM."""
- class PtHP(Pt):
- """Supplemental PT_* constants for HP-U."""
- class PtIA_64(Pt):
- """Supplemental PT_* constants for EM_IA_64."""
- class PtMIPS(Pt):
- """Supplemental PT_* constants for EM_MIPS."""
- class PtPARISC(Pt):
- """Supplemental PT_* constants for EM_PARISC."""
- class PtRISCV(Pt):
- """Supplemental PT_* constants for EM_RISCV."""
- _register_elf_h(PtAARCH64, prefix='PT_AARCH64_', parent=Pt)
- _register_elf_h(PtARM, prefix='PT_ARM_', parent=Pt)
- _register_elf_h(PtHP, prefix='PT_HP_', parent=Pt)
- _register_elf_h(PtIA_64, prefix='PT_IA_64_', parent=Pt)
- _register_elf_h(PtMIPS, prefix='PT_MIPS_', parent=Pt)
- _register_elf_h(PtPARISC, prefix='PT_PARISC_', parent=Pt)
- _register_elf_h(PtRISCV, prefix='PT_RISCV_', parent=Pt)
- _register_elf_h(Pt, skip='PT_LOSUNW PT_HISUNW'.split(), ranges=True)
- class Dt(_TypedConstant):
- """ELF dynamic segment tags. Type of Dyn.d_val."""
- prefix = 'DT_'
- class DtAARCH64(Dt):
- """Supplemental DT_* constants for EM_AARCH64."""
- class DtALPHA(Dt):
- """Supplemental DT_* constants for EM_ALPHA."""
- class DtALTERA_NIOS2(Dt):
- """Supplemental DT_* constants for EM_ALTERA_NIOS2."""
- class DtIA_64(Dt):
- """Supplemental DT_* constants for EM_IA_64."""
- class DtMIPS(Dt):
- """Supplemental DT_* constants for EM_MIPS."""
- class DtPPC(Dt):
- """Supplemental DT_* constants for EM_PPC."""
- class DtPPC64(Dt):
- """Supplemental DT_* constants for EM_PPC64."""
- class DtRISCV(Dt):
- """Supplemental DT_* constants for EM_RISCV."""
- class DtSPARC(Dt):
- """Supplemental DT_* constants for EM_SPARC."""
- class DtX86_64(Dt):
- """Supplemental DT_* constants for EM_X86_64."""
- _dt_skip = '''
- DT_ENCODING DT_PROCNUM
- DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
- DT_VALRNGLO DT_VALRNGHI DT_VALNUM
- DT_VERSIONTAGNUM DT_EXTRANUM
- DT_AARCH64_NUM
- DT_ALPHA_NUM
- DT_IA_64_NUM
- DT_MIPS_NUM
- DT_PPC_NUM
- DT_PPC64_NUM
- DT_SPARC_NUM
- DT_X86_64_NUM
- '''.strip().split()
- _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
- _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
- _register_elf_h(DtALTERA_NIOS2, prefix='DT_NIOS2_', skip=_dt_skip, parent=Dt)
- _register_elf_h(DtIA_64, prefix='DT_IA_64_', skip=_dt_skip, parent=Dt)
- _register_elf_h(DtMIPS, prefix='DT_MIPS_', skip=_dt_skip, parent=Dt)
- _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
- _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
- _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
- _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
- _register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
- _register_elf_h(Dt, skip=_dt_skip, ranges=True)
- del _dt_skip
- # Constant extraction is complete.
- del _register_elf_h
- del _elf_h
- class StInfo:
- """ELF symbol binding and type. Type of the Sym.st_info field."""
- def __init__(self, arg0, arg1=None):
- if isinstance(arg0, int) and arg1 is None:
- self.bind = Stb(arg0 >> 4)
- self.type = Stt(arg0 & 15)
- else:
- self.bind = Stb(arg0)
- self.type = Stt(arg1)
- def value(self):
- """Returns the raw value for the bind/type combination."""
- return (self.bind.value() << 4) | (self.type.value())
- # Type in an ELF file. Used for deserialization.
- _Layout = collections.namedtuple('_Layout', 'unpack size')
- def _define_layouts(baseclass: type, layout32: str, layout64: str,
- types=None, fields32=None):
- """Assign variants dict to baseclass.
- The variants dict is indexed by (ElfClass, ElfData) pairs, and its
- values are _Layout instances.
- """
- struct32 = struct.Struct(layout32)
- struct64 = struct.Struct(layout64)
- # Check that the struct formats yield the right number of components.
- for s in (struct32, struct64):
- example = s.unpack(b' ' * s.size)
- if len(example) != len(baseclass._fields):
- raise ValueError('{!r} yields wrong field count: {} != {}'.format(
- s.format, len(example), len(baseclass._fields)))
- # Check that field names in types are correct.
- if types is None:
- types = ()
- for n in types:
- if n not in baseclass._fields:
- raise ValueError('{} does not have field {!r}'.format(
- baseclass.__name__, n))
- if fields32 is not None \
- and set(fields32) != set(baseclass._fields):
- raise ValueError('{!r} is not a permutation of the fields {!r}'.format(
- fields32, baseclass._fields))
- def unique_name(name, used_names = (set((baseclass.__name__,))
- | set(baseclass._fields)
- | {n.__name__
- for n in (types or {}).values()})):
- """Find a name that is not used for a class or field name."""
- candidate = name
- n = 0
- while candidate in used_names:
- n += 1
- candidate = '{}{}'.format(name, n)
- used_names.add(candidate)
- return candidate
- blob_name = unique_name('blob')
- struct_unpack_name = unique_name('struct_unpack')
- comps_name = unique_name('comps')
- layouts = {}
- for (bits, elfclass, layout, fields) in (
- (32, ElfClass.ELFCLASS32, layout32, fields32),
- (64, ElfClass.ELFCLASS64, layout64, None),
- ):
- for (elfdata, structprefix, funcsuffix) in (
- (ElfData.ELFDATA2LSB, '<', 'LE'),
- (ElfData.ELFDATA2MSB, '>', 'BE'),
- ):
- env = {
- baseclass.__name__: baseclass,
- struct_unpack_name: struct.unpack,
- }
- # Add the type converters.
- if types:
- for cls in types.values():
- env[cls.__name__] = cls
- funcname = ''.join(
- ('unpack_', baseclass.__name__, str(bits), funcsuffix))
- code = '''
- def {funcname}({blob_name}):
- '''.format(funcname=funcname, blob_name=blob_name)
- indent = ' ' * 4
- unpack_call = '{}({!r}, {})'.format(
- struct_unpack_name, structprefix + layout, blob_name)
- field_names = ', '.join(baseclass._fields)
- if types is None and fields is None:
- code += '{}return {}({})\n'.format(
- indent, baseclass.__name__, unpack_call)
- else:
- # Destructuring tuple assignment.
- if fields is None:
- code += '{}{} = {}\n'.format(
- indent, field_names, unpack_call)
- else:
- # Use custom field order.
- code += '{}{} = {}\n'.format(
- indent, ', '.join(fields), unpack_call)
- # Perform the type conversions.
- for n in baseclass._fields:
- if n in types:
- code += '{}{} = {}({})\n'.format(
- indent, n, types[n].__name__, n)
- # Create the named tuple.
- code += '{}return {}({})\n'.format(
- indent, baseclass.__name__, field_names)
- exec(code, env)
- layouts[(elfclass, elfdata)] = _Layout(
- env[funcname], struct.calcsize(layout))
- baseclass.layouts = layouts
- # Corresponds to EI_* indices into Elf*_Ehdr.e_indent.
- class Ident(collections.namedtuple('Ident',
- 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')):
- def __new__(cls, *args):
- """Construct an object from a blob or its constituent fields."""
- if len(args) == 1:
- return cls.unpack(args[0])
- return cls.__base__.__new__(cls, *args)
- @staticmethod
- def unpack(blob: memoryview) -> 'Ident':
- """Parse raws data into a tuple."""
- ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \
- ei_pad = struct.unpack('4s5B7s', blob)
- return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data),
- ei_version, ei_osabi, ei_abiversion, ei_pad)
- size = 16
- # Corresponds to Elf32_Ehdr and Elf64_Ehdr.
- Ehdr = collections.namedtuple('Ehdr',
- 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags'
- + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx')
- _define_layouts(Ehdr,
- layout32='16s2H5I6H',
- layout64='16s2HI3QI6H',
- types=dict(e_ident=Ident,
- e_machine=Machine,
- e_type=Et,
- e_shstrndx=Shn))
- # Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter.
- Phdr = collections.namedtuple('Phdr',
- 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align')
- _define_layouts(Phdr,
- layout32='8I',
- fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
- 'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
- layout64='2I6Q',
- types=dict(p_type=Pt, p_flags=Pf))
- # Corresponds to Elf32_Shdr and Elf64_Shdr.
- class Shdr(collections.namedtuple('Shdr',
- 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info'
- + ' sh_addralign sh_entsize')):
- def resolve(self, strtab: 'StringTable') -> 'Shdr':
- """Resolve sh_name using a string table."""
- return self.__class__(strtab.get(self[0]), *self[1:])
- _define_layouts(Shdr,
- layout32='10I',
- layout64='2I4Q2I2Q',
- types=dict(sh_type=Sht,
- sh_flags=Shf,
- sh_link=Shn))
- # Corresponds to Elf32_Dyn and Elf64_Dyn. The nesting through the
- # d_un union is skipped, and d_ptr is missing (its representation in
- # Python would be identical to d_val).
- Dyn = collections.namedtuple('Dyn', 'd_tag d_val')
- _define_layouts(Dyn,
- layout32='2i',
- layout64='2q',
- types=dict(d_tag=Dt))
- # Corresponds to Elf32_Sym and Elf64_Sym.
- class Sym(collections.namedtuple('Sym',
- 'st_name st_info st_other st_shndx st_value st_size')):
- def resolve(self, strtab: 'StringTable') -> 'Sym':
- """Resolve st_name using a string table."""
- return self.__class__(strtab.get(self[0]), *self[1:])
- _define_layouts(Sym,
- layout32='3I2BH',
- layout64='I2BH2Q',
- fields32=('st_name', 'st_value', 'st_size', 'st_info',
- 'st_other', 'st_shndx'),
- types=dict(st_shndx=Shn,
- st_info=StInfo))
- # Corresponds to Elf32_Rel and Elf64_Rel.
- Rel = collections.namedtuple('Rel', 'r_offset r_info')
- _define_layouts(Rel,
- layout32='2I',
- layout64='2Q')
- # Corresponds to Elf32_Rel and Elf64_Rel.
- Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend')
- _define_layouts(Rela,
- layout32='3I',
- layout64='3Q')
- class StringTable:
- """ELF string table."""
- def __init__(self, blob):
- """Create a new string table backed by the data in the blob.
- blob: a memoryview-like object
- """
- self.blob = blob
- def get(self, index) -> bytes:
- """Returns the null-terminated byte string at the index."""
- blob = self.blob
- endindex = index
- while True:
- if blob[endindex] == 0:
- return bytes(blob[index:endindex])
- endindex += 1
- class Image:
- """ELF image parser."""
- def __init__(self, image):
- """Create an ELF image from binary image data.
- image: a memoryview-like object that supports efficient range
- subscripting.
- """
- self.image = image
- ident = self.read(Ident, 0)
- classdata = (ident.ei_class, ident.ei_data)
- # Set self.Ehdr etc. to the subtypes with the right parsers.
- for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela):
- setattr(self, typ.__name__, typ.layouts.get(classdata, None))
- if self.Ehdr is not None:
- self.ehdr = self.read(self.Ehdr, 0)
- self._shdr_num = self._compute_shdr_num()
- else:
- self.ehdr = None
- self._shdr_num = 0
- self._section = {}
- self._stringtab = {}
- if self._shdr_num > 0:
- self._shdr_strtab = self._find_shdr_strtab()
- else:
- self._shdr_strtab = None
- @staticmethod
- def readfile(path: str) -> 'Image':
- """Reads the ELF file at the specified path."""
- with open(path, 'rb') as inp:
- return Image(memoryview(inp.read()))
- def _compute_shdr_num(self) -> int:
- """Computes the actual number of section headers."""
- shnum = self.ehdr.e_shnum
- if shnum == 0:
- if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0:
- # No section headers.
- return 0
- # Otherwise the extension mechanism is used (which may be
- # needed because e_shnum is just 16 bits).
- return self.read(self.Shdr, self.ehdr.e_shoff).sh_size
- return shnum
- def _find_shdr_strtab(self) -> StringTable:
- """Finds the section header string table (maybe via extensions)."""
- shstrndx = self.ehdr.e_shstrndx
- if shstrndx == Shn.SHN_XINDEX:
- shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link
- return self._find_stringtab(shstrndx)
- def read(self, typ: type, offset:int ):
- """Reads an object at a specific offset.
- The type must have been enhanced using _define_variants.
- """
- return typ.unpack(self.image[offset: offset + typ.size])
- def phdrs(self) -> Phdr:
- """Generator iterating over the program headers."""
- if self.ehdr is None:
- return
- size = self.ehdr.e_phentsize
- if size != self.Phdr.size:
- raise ValueError('Unexpected Phdr size in ELF header: {} != {}'
- .format(size, self.Phdr.size))
- offset = self.ehdr.e_phoff
- for _ in range(self.ehdr.e_phnum):
- yield self.read(self.Phdr, offset)
- offset += size
- def shdrs(self, resolve: bool=True) -> Shdr:
- """Generator iterating over the section headers.
- If resolve, section names are automatically translated
- using the section header string table.
- """
- if self._shdr_num == 0:
- return
- size = self.ehdr.e_shentsize
- if size != self.Shdr.size:
- raise ValueError('Unexpected Shdr size in ELF header: {} != {}'
- .format(size, self.Shdr.size))
- offset = self.ehdr.e_shoff
- for _ in range(self._shdr_num):
- shdr = self.read(self.Shdr, offset)
- if resolve:
- shdr = shdr.resolve(self._shdr_strtab)
- yield shdr
- offset += size
- def dynamic(self) -> Dyn:
- """Generator iterating over the dynamic segment."""
- for phdr in self.phdrs():
- if phdr.p_type == Pt.PT_DYNAMIC:
- # Pick the first dynamic segment, like the loader.
- if phdr.p_filesz == 0:
- # Probably separated debuginfo.
- return
- offset = phdr.p_offset
- end = offset + phdr.p_memsz
- size = self.Dyn.size
- while True:
- next_offset = offset + size
- if next_offset > end:
- raise ValueError(
- 'Dynamic segment size {} is not a multiple of Dyn size {}'.format(
- phdr.p_memsz, size))
- yield self.read(self.Dyn, offset)
- if next_offset == end:
- return
- offset = next_offset
- def syms(self, shdr: Shdr, resolve: bool=True) -> Sym:
- """A generator iterating over a symbol table.
- If resolve, symbol names are automatically translated using
- the string table for the symbol table.
- """
- assert shdr.sh_type == Sht.SHT_SYMTAB
- size = shdr.sh_entsize
- if size != self.Sym.size:
- raise ValueError('Invalid symbol table entry size {}'.format(size))
- offset = shdr.sh_offset
- end = shdr.sh_offset + shdr.sh_size
- if resolve:
- strtab = self._find_stringtab(shdr.sh_link)
- while offset < end:
- sym = self.read(self.Sym, offset)
- if resolve:
- sym = sym.resolve(strtab)
- yield sym
- offset += size
- if offset != end:
- raise ValueError('Symbol table is not a multiple of entry size')
- def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes:
- """Looks up a string in a string table identified by its link index."""
- try:
- strtab = self._stringtab[strtab_index]
- except KeyError:
- strtab = self._find_stringtab(strtab_index)
- return strtab.get(strtab_offset)
- def find_section(self, shndx: Shn) -> Shdr:
- """Returns the section header for the indexed section.
- The section name is not resolved.
- """
- try:
- return self._section[shndx]
- except KeyError:
- pass
- if shndx in Shn:
- raise ValueError('Reserved section index {}'.format(shndx))
- idx = shndx.value
- if idx < 0 or idx > self._shdr_num:
- raise ValueError('Section index {} out of range [0, {})'.format(
- idx, self._shdr_num))
- shdr = self.read(
- self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size)
- self._section[shndx] = shdr
- return shdr
- def _find_stringtab(self, sh_link: int) -> StringTable:
- if sh_link in self._stringtab:
- return self._stringtab
- if sh_link < 0 or sh_link >= self._shdr_num:
- raise ValueError('Section index {} out of range [0, {})'.format(
- sh_link, self._shdr_num))
- shdr = self.read(
- self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size)
- if shdr.sh_type != Sht.SHT_STRTAB:
- raise ValueError(
- 'Section {} is not a string table: {}'.format(
- sh_link, shdr.sh_type))
- strtab = StringTable(
- self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size])
- # This could retrain essentially arbitrary amounts of data,
- # but caching string tables seems important for performance.
- self._stringtab[sh_link] = strtab
- return strtab
- def elf_hash(s):
- """Computes the ELF hash of the string."""
- acc = 0
- for ch in s:
- if type(ch) is not int:
- ch = ord(ch)
- acc = ((acc << 4) + ch) & 0xffffffff
- top = acc & 0xf0000000
- acc = (acc ^ (top >> 24)) & ~top
- return acc
- def gnu_hash(s):
- """Computes the GNU hash of the string."""
- h = 5381
- for ch in s:
- if type(ch) is not int:
- ch = ord(ch)
- h = (h * 33 + ch) & 0xffffffff
- return h
- __all__ = [name for name in dir() if name[0].isupper()]
|