diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6d3433f --- /dev/null +++ b/Makefile @@ -0,0 +1,60 @@ +# Kernel Makefile +# try `make help` for more information + +# Default target +.DEFAULT_GOAL = all + +# Path to the files for the initial ramdisk (for Assignment 7) +INITRD_DIR ?= initrd/ +INITRD_TOOL ?= fs/tool/fstool +INITRD_DEP = +# 1MB free space +INITRD_FREE ?= 1048576 + +# Kernel source files +LINKER_SCRIPT = compiler/sections.ld +CRTI_SOURCE = compiler/crti.asm +CRTN_SOURCE = compiler/crtn.asm +CC_SOURCES = $(shell find * -name "*.cc" -a ! -name '.*' -a ! -path 'test*' -a ! -path 'fs/tool/*') +ASM_SOURCES = $(shell find * -name "*.asm" -a ! -name '.*') + +# Target files +KERNEL = $(BUILDDIR)/system +KERNEL64 = $(KERNEL)64 +ISOFILE = $(BUILDDIR)/stubs.iso + +# Include global variables and standard recipes +include tools/common.mk + +# Initial Ramdisk +ifneq ($(wildcard $(INITRD_DIR)*),) +INITRD = $(BUILDDIR)/initrd.img +INITRD_DEP += $(shell find $(INITRD_DIR) -type f ) +# Additional dependency for kernel +$(KERNEL): $(INITRD) +endif + +all: $(KERNEL) + +# Linking the system image +# We use the C++ compiler (which calls the actual linker) +$(KERNEL64): $(CRTI_OBJECT) $(CRTN_OBJECT) $(ASM_OBJECTS) $(CC_OBJECTS) $(LINKER_SCRIPT) $(MAKEFILE_LIST) + @echo "LD $@" + @mkdir -p $(@D) + $(VERBOSE) $(CXX) $(CXXFLAGS) -Wl,-T $(LINKER_SCRIPT) -o $@ $(CRTI_OBJECT) $(CRTBEGIN_OBJECT) $(ASM_OBJECTS) $(CC_OBJECTS) $(LDFLAGS) $(LIBGCC) $(CRTEND_OBJECT) $(CRTN_OBJECT) + +# The kernel must be a 32bit elf for multiboot compliance +$(KERNEL): $(KERNEL64) + $(VERBOSE) $(OBJCOPY) -I elf64-x86-64 -O elf32-i386 $< $@ + +# Tool for editing a Minix v3 file system image (Assignment 7) +$(INITRD_TOOL): $(shell test -d $(dir $(INITRD_TOOL)) && find $(dir $(INITRD_TOOL)) -name "*.cc" -or -name '*.h') + @echo "Make $@" + $(VERBOSE) MAKEFLAGS="" $(MAKE) -C $(dir $(INITRD_TOOL)) + +# Initial Ramdisk with Minix v3 file system +$(INITRD): $(INITRD_TOOL) $(INITRD_DEP) + @echo "INITRD $@" + $(VERBOSE) dd if=/dev/zero of=$@ bs=$(shell du -s $(INITRD_DIR) | cut -f1 | xargs expr $(INITRD_FREE) + ) count=1 + $(VERBOSE) /sbin/mkfs.minix -3 $@ # optional --inodes + $(VERBOSE) ./$(INITRD_TOOL) put "$(INITRD_DIR)" $@ diff --git a/boot/longmode.asm b/boot/longmode.asm new file mode 100644 index 0000000..753eb7e --- /dev/null +++ b/boot/longmode.asm @@ -0,0 +1,245 @@ +; The stony path to Long Mode (64-bit)... +; ... begins in 32-bit Protected Mode +[BITS 32] + +; Pointer to Long Mode Global Descriptor Table (GDT, machine/gdt.cc) +[EXTERN gdt_long_mode_pointer] + +[GLOBAL long_mode] +long_mode: + +; You can check if the CPU supports Long Mode by using the `cpuid` command. +; Problem: You first have to figure out if the `cpuid` command itself is +; supported. Therefore, you have to try to reverse the 21st bit in the EFLAGS +; register -- if it works, then there is the 'cpuid' instruction. +CPUID_BIT_MASK equ 1 << 21 + +check_cpuid: + ; Save EFLAGS on stack + pushfd + + ; Copy stored EFLAGS from stack to EAX register + mov eax, [esp] + + ; Flip the 21st bit (ID) in EAX + xor eax, CPUID_BIT_MASK + + ; Copy EAX to EFLAGS (using the stack) + push eax + popfd + + ; And reverse: copy EFLAGS to EAX (using the stack) + ; (but the 21st bit should now still be flipped, if `cpuid` is supported) + pushfd + pop eax + + ; Compare the new EFLAGS copy (residing in EAX) with the EFLAGS stored at + ; the beginning of this function by using an exclusive OR -- all different + ; (flipped) bits will be stored in EAX. + xor eax, [esp] + + ; Restore original EFLAGS + popfd + + ; If 21st Bit in EAX is set, `cpuid` is supported -- continue at check_long_mode + and eax, CPUID_BIT_MASK + jnz check_long_mode + + ; Show error message "No CPUID" and stop CPU + mov dword [0xb8000], 0xcf6fcf4e + mov dword [0xb8004], 0xcf43cf20 + mov dword [0xb8008], 0xcf55cf50 + mov dword [0xb800c], 0xcf44cf49 + hlt + +; Now you are able to use the `cpuid` instruction to check if Long Mode is +; available -- after you've checked if the `cpuid` is able to perform the +; check itself (since it is an extended `cpuid` function)... + +CPUID_GET_LARGEST_EXTENDED_FUNCTION_NUMBER equ 0x80000000 +CPUID_GET_EXTENDED_PROCESSOR_FEATURES equ 0x80000001 +CPUID_HAS_LONGMODE equ 1 << 29 + +check_long_mode: + ; Set argument for `cpuid` to check the availability of extended functions + ; and call cpuid + mov eax, CPUID_GET_LARGEST_EXTENDED_FUNCTION_NUMBER + cpuid + ; The return value contains the maximum function number supported by `cpuid`, + ; You'll need the function number for extended processor features + cmp eax, CPUID_GET_EXTENDED_PROCESSOR_FEATURES + ; If not present, the CPU is definitely too old to support long mode + jb no_long_mode + + ; Finally, you are able to check the Long Mode support itself + mov eax, CPUID_GET_EXTENDED_PROCESSOR_FEATURES + cpuid + ; If the return value in the EDX register has set the 29th bit, + ; then long mode is supported -- continue with setup_paging + test edx, CPUID_HAS_LONGMODE + jnz setup_paging + +no_long_mode: + ; Show error message "No 64bit" and stop CPU + mov dword [0xb8000], 0xcf6fcf4e + mov dword [0xb8004], 0xcf36cf20 + mov dword [0xb8008], 0xcf62cf34 + mov dword [0xb800c], 0xcf74cf69 + hlt + +; Paging is required for Long Mode. +; Since an extensive page manager might be a bit of an overkill to start with, +; the following code creates an identity mapping for the first four gigabytes +; (using huge pages): each virtual address will point to the same physical one. +; This area (up to 4 GiB) is important for some memory mapped devices (APIC) +; and you don't want to remap them yet for simplicity reasons. +; In the advanced operating systems lecture, this topic is covered in detail, +; however, if you want a quick overview, have a look at +; https://wiki.osdev.org/Page_Tables#2_MiB_pages_2 + +PAGE_SIZE equ 4096 +PAGE_FLAGS_PRESENT equ 1 << 0 +PAGE_FLAGS_WRITEABLE equ 1 << 1 +PAGE_FLAGS_USER equ 1 << 2 +PAGE_FLAGS_HUGE equ 1 << 7 + +setup_paging: + ; Unlike in Protected Mode, an entry in the page table has a size of 8 bytes + ; (vs 4 bytes), so there are only 512 (and not 1024) entries per table. + ; Structure of the 3-level PAE paging: One entry in the + ; - lv2: Page-Directory-Table (PDT) covers 2 MiB (1 Huge Page) + ; - lv3: Page-Directory-Pointer-Table (PDPT) covers 1 GiB (512 * 2 MiB) + ; - lv4: Page-Map-Level-4-Table (PML4) covers 512 GiB (512 * 1 GiB) + + ; To address 4 GiB only four level-2 tables are required. + ; All entries of the level-2 tables should be marked as writeable (attributes) + ; and map (point to) the corresponding physical memory. + + ; This is done in a loop using ECX as counter + mov ecx, 0 + +.identitymap_level2: + ; Calculate physical address in EAX (2 MiB multiplied by the counter) + mov eax, 0x200000 + mul ecx + ; Configure page attributes + or eax, PAGE_FLAGS_PRESENT | PAGE_FLAGS_WRITEABLE | PAGE_FLAGS_HUGE | PAGE_FLAGS_USER + ; Write (8 byte) entry in the level-2 table + mov [paging_level2_tables + ecx * 8], eax + + ; Increment counter... + inc ecx + ; ... until all four level-2 tables are filled + cmp ecx, 512 * 4 + jne .identitymap_level2 + + ; The first four entries of the level-3 table should point to the + ; four level-2 tables (and be writeable as well). + ; Again, ECX acts as counter for the loop + mov ecx, 0 + +.identitymap_level3: + ; Calculate the address: ECX * PAGE_SIZE + paging_level2_tables + mov eax, ecx + ; The size of a page is stored in the EDX register + mov edx, PAGE_SIZE + mul edx + add eax, paging_level2_tables + ; Configure attributes + or eax, PAGE_FLAGS_PRESENT | PAGE_FLAGS_WRITEABLE | PAGE_FLAGS_USER + ; Write (8 byte) entry in the level-3 table + mov [paging_level3_table + ecx * 8], eax + + ; Increment counter... + inc ecx + ; ... until all four entries of the table are written + cmp ecx, 4 + jne .identitymap_level3 + + mov eax, paging_level2_tables + or eax, PAGE_FLAGS_PRESENT | PAGE_FLAGS_WRITEABLE | PAGE_FLAGS_USER + mov [paging_level3_table], eax + + ; The first entry of the level-4 table should point to to the level-3 table + mov eax, paging_level3_table + or eax, PAGE_FLAGS_PRESENT | PAGE_FLAGS_WRITEABLE | PAGE_FLAGS_USER + mov [paging_level4_table], eax + +; Time to activate paging +paging_enable: + ; First setup the control registers + + ; Write the address of the level-4 table into the CR3 register + mov eax, paging_level4_table + mov cr3, eax + + ; Activate Physical Address Extension (PAE) + ; by setting the 5th bits in the CR4 register + mov eax, cr4 + or eax, 1 << 5 + mov cr4, eax + + ; Set the Long Mode Enable Bit in the EFER MSR + ; (Extended Feature Enable Register Model Specific Register) + mov ecx, 0xC0000080 + rdmsr + or eax, 1 << 8 + wrmsr + + ; Finally, the 31st bit in CR0 is set to enable Paging + mov eax, cr0 + or eax, 1 << 31 + mov cr0, eax + + ; Load Long Mode Global Descriptor Table + lgdt [gdt_long_mode_pointer] + + ; Far jump to the 64-bit start code + jmp 0x8:long_mode_start + + ; print `KO` to screen + mov dword [0xb8000], 0x3f4f3f4b + hlt + +; Memory reserved for page tables +[SECTION .bss] + +ALIGN 4096 + +[GLOBAL paging_level4_table] +[GLOBAL paging_level3_table] +[GLOBAL paging_level2_tables] +; 1x Level-4 Table (Page Map Level 4) +paging_level4_table: + resb PAGE_SIZE + +; 1x Level-3 Table (Page Directory Pointer Table) +paging_level3_table: + resb PAGE_SIZE + +; 4x Level-2 Table (Page Directory) +paging_level2_tables: + resb PAGE_SIZE * 4 + +[SECTION .text] +[EXTERN kernel_init] ; C++ entry function + +; Continue with 64 bit code +[BITS 64] + +long_mode_start: + ; Set data segment registers to SEGMENT_KERNEL_DATA (machine/gdt.h) + mov ax, 0x10 + mov ss, ax + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + + ; Call high-level (C++) kernel initialization function + call kernel_init + + ; Print `STOP` to screen and stop + mov rax, 0x2f502f4f2f544f53 + mov qword [0xb8000], rax + hlt diff --git a/boot/multiboot/config.inc b/boot/multiboot/config.inc new file mode 100644 index 0000000..1e764f3 --- /dev/null +++ b/boot/multiboot/config.inc @@ -0,0 +1,22 @@ +; Magic Header, has to be present in Kernel to indicate Multiboot compliance +MULTIBOOT_HEADER_MAGIC_OS equ 0x1badb002 + +; Answer by the boot loader for Multiboot compliance, written in eax register +MULTIBOOT_HEADER_MAGIC_LOADER equ 0x2badb002 + +; Flags instructing the Multiboot compliant boot loader to setup the system +; according to your needs +MULTIBOOT_PAGE_ALIGN equ 1<<0 ; Align boot modules (initrds) at 4 KiB border +MULTIBOOT_MEMORY_INFO equ 1<<1 ; Request Memory Map information +MULTIBOOT_VIDEO_MODE equ 1<<2 ; Configure video mode + +MULTIBOOT_HEADER_FLAGS equ 0 + +; Desired video mode (only considered if MULTIBOOT_VIDEO_MODE set) +; (boot loader will choose the best fitting mode, which might differ from the settings below) +MULTIBOOT_VIDEO_WIDTH equ 1280 ; Desired width +MULTIBOOT_VIDEO_HEIGHT equ 1024 ; Desired height +MULTIBOOT_VIDEO_BITDEPTH equ 32 ; Desired bit depth + +; Checksum +MULTIBOOT_HEADER_CHKSUM equ -(MULTIBOOT_HEADER_MAGIC_OS + MULTIBOOT_HEADER_FLAGS) diff --git a/boot/multiboot/data.cc b/boot/multiboot/data.cc new file mode 100644 index 0000000..2cf0315 --- /dev/null +++ b/boot/multiboot/data.cc @@ -0,0 +1,165 @@ +#include "boot/multiboot/data.h" + +/*! \brief Multiboot Information Structure according to Specification + * \see [Multiboot Specification]{#multiboot} + */ +struct multiboot_info { + /*! \brief Helper Structure + */ + struct Array { + uint32_t size; ///< Length + uint32_t addr; ///< Begin (physical address) + } __attribute__((packed)); + + enum Flag : uint32_t { + Memory = 1U << 0, ///< is there basic lower/upper memory information? + BootDev = 1U << 1, ///< is there a boot device set? + CmdLine = 1U << 2, ///< is the command-line defined? + Modules = 1U << 3, ///< are there modules to do something with? + /* These next two are mutually exclusive */ + SymbolTable = 1U << 4, ///< is there an a.out symbol table loaded? + SectionHeader = 1U << 5, ///< is there an ELF section header table? + + MemoryMap = 1U << 6, ///< is there a full memory map? + DriveInfo = 1U << 7, ///< Is there drive info? + ConfigTable = 1U << 8, ///< Is there a config table? + BootLoaderName = 1U << 9, ///< Is there a boot loader name? + ApmTable = 1U << 10, ///< Is there a APM table? + + // Is there video information? + VbeInfo = 1U << 11, ///< Vesa bios extension + FramebufferInfo = 1U << 12 ///< Framebuffer + } flags; + + /*! \brief Available memory retrieved from BIOS + */ + struct { + uint32_t lower; ///< Amount of memory below 1 MiB in kilobytes + uint32_t upper; ///< Amount of memory above 1 MiB in kilobytes + } mem __attribute__((packed)); + uint32_t boot_device; ///< "root" partition + uint32_t cmdline; ///< Kernel command line + Array mods; ///< List of boot modules + union { + /*! \brief Symbol table for kernel in a.out format + */ + struct { + uint32_t tabsize; + uint32_t strsize; + uint32_t addr; + uint32_t reserved; + } aout_symbol_table __attribute__((packed)); + + /*! \brief Section header table for kernel in ELF + */ + struct { + uint32_t num; ///< Number of entries + uint32_t size; ///< Size per entry + uint32_t addr; ///< Start of the header table + uint32_t shndx; ///< String table index + } elf_section_header_table __attribute__((packed)); + }; + + struct Array mmap; ///< Memory Map + struct Array drives; ///< Drive Information + uint32_t config_table; ///< ROM configuration table + uint32_t boot_loader_name; ///< Boot Loader Name + uint32_t apm_table; ///< APM table + + struct Multiboot::VBE vbe; ///< VBE Information + struct Multiboot::Framebuffer framebuffer; ///< Framebuffer information + + /*! \brief Check if setting is available + * \param flag Flag to check + * \return `true` if available + */ + bool has(enum Flag flag) const { + return (flags & flag) != 0; + } +} __attribute__((packed)); +assert_size(multiboot_info, 116); + +/*! \brief The pointer to the multiboot structures will be assigned in the assembler startup code (boot/startup.asm) + */ +struct multiboot_info *multiboot_addr = 0; + +namespace Multiboot { +Module * getModule(unsigned i) { + if (multiboot_addr != nullptr && + multiboot_addr->has(multiboot_info::Flag::Modules) && + i < multiboot_addr->mods.size) { + return i + reinterpret_cast(static_cast(multiboot_addr->mods.addr)); + } else { + return nullptr; + } +} + +unsigned getModuleCount() { + return multiboot_addr->mods.size; +} + +void * Memory::getStartAddress() const { + if (sizeof(void*) == 4 && (addr >> 32) != 0) { + return reinterpret_cast(addr & 0xffffffff); + } else { + return reinterpret_cast(static_cast(addr)); + } +} + +void * Memory::getEndAddress() const { + uint64_t end = addr + len; + if (sizeof(void*) == 4 && (end >> 32) != 0) { + return reinterpret_cast(end & 0xffffffff); + } else { + return reinterpret_cast(static_cast(end)); + } +} + +bool Memory::isAvailable() const { + return type == AVAILABLE; +} + +Memory * Memory::getNext() const { + if (multiboot_addr != nullptr && multiboot_addr->has(multiboot_info::Flag::MemoryMap)) { + uintptr_t next = reinterpret_cast(this) + size + sizeof(size); + if (next < multiboot_addr->mmap.addr + multiboot_addr->mmap.size) { + return reinterpret_cast(next); + } + } + return nullptr; +} + +Memory * getMemoryMap() { + if (multiboot_addr != nullptr && + multiboot_addr->has(multiboot_info::Flag::MemoryMap) && + multiboot_addr->mmap.size > 0) { + return reinterpret_cast(static_cast(multiboot_addr->mmap.addr)); + } else { + return nullptr; + } +} + +char * getCommandLine() { + return reinterpret_cast(static_cast(multiboot_addr->cmdline)); +} + +char * getBootLoader() { + return reinterpret_cast(static_cast(multiboot_addr->boot_loader_name)); +} + +VBE * getVesaBiosExtensionInfo() { + if (multiboot_addr != nullptr && multiboot_addr->has(multiboot_info::Flag::VbeInfo)) { + return &(multiboot_addr->vbe); + } else { + return nullptr; + } +} + +Framebuffer * getFramebufferInfo() { + if (multiboot_addr != nullptr && multiboot_addr->has(multiboot_info::Flag::FramebufferInfo)) { + return &(multiboot_addr->framebuffer); + } else { + return nullptr; + } +} +} // namespace Multiboot diff --git a/boot/multiboot/data.h b/boot/multiboot/data.h new file mode 100644 index 0000000..ad27563 --- /dev/null +++ b/boot/multiboot/data.h @@ -0,0 +1,232 @@ +/*! \file + * \brief \ref Multiboot Interface + */ +#pragma once + +#include "types.h" +#include "compiler/fix.h" +#include "debug/assert.h" + +/*! \brief Interface for Multiboot + * + * Due to historical reasons, a normal BIOS allows you to do quite an egg dance + * until you finally reach the actual kernel (especially with only 512 bytes + * available in the master boot record...). + * Fortunately, there are [boot loaders](https://wiki.osdev.org/Bootloader) that + * (partly) do this ungrateful job for you: + * They load your kernel into memory, switch (the bootstrap processor) to + * protected mode (32 bit) and jump to the entry point of our kernel -- saving + * you a lot of boring (or enlightening?) work: reading ancient systems documentation. + * One of the most famous representatives is the + * [Grand Unified Bootloader (GRUB)](https://www.gnu.org/software/grub/), which + * is also the reference implementation of the \ref multiboot "Multiboot Specification". + * + * A Multiboot compliant boot loader will prepare the system according to your + * needs and can hand you a lot of useful information (e.g. references to + * initial ramdisks). + * + * However, you have to inform the loader that you are also compliant to the + * specification, and (if required) instruct the loader to adjust specific + * settings (e.g. the graphics mode). + * + * For this purpose you have to configure the beginning of the kernel (the first + * 8192 bytes of the kernel binary) accordingly (see `compiler/section.ld`) -- + * this is were the boot loader will search for a magic header and parse the + * subsequent entries containing the desired system configuration. + * In StuBS these flags are set in `boot/multiboot/config.inc` and the header + * structure is generated in `boot/multiboot/header.asm`. + * + * The first step in your \ref startup_bsp() "kernel entry function" is saving + * the pointer to the struct with the information from the boot loader + * (transferred via register `ebx`) -- and \ref Multiboot provides you the + * interface to comfortably access its contents! + */ +namespace Multiboot { +/*! \brief Boot Module + * (also known as `initrd` = initial Ramdisk) + * + * \see \ref multiboot-boot-modules "1.7 Boot modules" + * \see \ref multiboot-boot-info "3.3 Boot information format" + */ +class Module { + uint32_t start; ///< Start address + uint32_t end; ///< End address (excluded) + uint32_t cmdline; ///< commandline parameter + uint32_t pad UNUSED_STRUCT_FIELD; ///< alignment; must be 0 + + public: + /*! \brief Get start of this boot module + * \return Pointer to begin of modules physical address + */ + void * getStartAddress() const { + return reinterpret_cast(static_cast(start)); + } + + /*! \brief Get end of this boot module + * \return Pointer beyond the modules physical address + */ + void * getEndAddress() const { + return reinterpret_cast(static_cast(end)); + } + + /*! \brief Get the size of this boot module + * \return Module size in bytes (difference of end and start address) + */ + size_t getSize() const { + return static_cast(end-start); + } + + /*! \brief Get the command line for this module + * \return pointer to zero terminated string + */ + char * getCommandLine() const { + return reinterpret_cast(static_cast(cmdline)); + } +} __attribute__((packed)); +assert_size(Module, 16); + +/*! \brief Retrieve a certain boot module + * \param i boot module number + * \return Pointer to structure with boot module information + */ +Module * getModule(unsigned i); + +/*! \brief Get the number of modules + * \return Pointer to structure with boot module information + */ +unsigned getModuleCount(); + +/*! \brief Get the kernel command line + * \return pointer to zero terminated string + */ +char * getCommandLine(); + +/*! \brief Get the name of the boot loader + * \return pointer to zero terminated string + */ +char * getBootLoader(); + +/*! \brief Memory Map + * + * The boot loader queries the BIOS for a memory map and stores its result in + * (something like) a linked list. However, this list may not be complete, + * can have contradictory entries and does not take the location of your kernel + * or any boot modules into account. + * (Anyways, it is still the best memory map you will have in StuBS...) + * + * \note Needs to be enabled explicitly by setting the `MULTIBOOT_MEMORY_INFO` flag + * in the multiboot header (see `boot/multiboot/config.inc`)! + * + * \see [Detecting Memory](https://wiki.osdev.org/Detecting_Memory_(x86)) + */ +class Memory { + uint32_t size; ///< Size of this entry (can exceed size of the class, rest will be padding bits) + uint64_t addr; ///< Begin of memory area + uint64_t len; ///< length of the memory area + + /*! \brief Usage Type + */ + enum Type : uint32_t { + AVAILABLE = 1, ///< Memory is available and usable in kernel + RESERVED = 2, ///< Memory is reserved (without further explanation) + ACPI = 3, ///< Memory may be reclaimed by ACPI + NVS = 4, ///< Memory is non volatile storage for ACPI + BADRAM = 5 ///< Area contains bad memory + } type; + + public: + /*! \brief Get start of this memory area + * \return Pointer to begin of the physical address of the memory area + */ + void * getStartAddress() const; + + /*! \brief Get end of this memory area + * \return Pointer beyond the physical address of this memory area + */ + void * getEndAddress() const; + + /*! \brief Is the memory marked as usable + * \return `true` if available, `false` if not usable. + */ + bool isAvailable() const; + + /*! \brief Get the next memory area + * \return pointer to the next memory area entry or `nullptr` if last area + */ + Memory * getNext() const; +} __attribute__((packed)); +assert_size(Memory, 24); + +/*! \brief Retrieve the first entry of the memory map + */ +Memory * getMemoryMap(); + +/*! \brief Video mode: Vesa BIOS Extension + * + * \see [VESA BIOS Extension (VBE) Core Functions (Version 3)](vbe3.pdf) + */ +struct VBE { + uint32_t control_info; ///< Pointer to VBE control information + uint32_t mode_info; ///< Pointer to VBE mode information + uint16_t mode; ///< Selected video mode (as defined in the standard) + uint16_t interface_seg; ///< Protected mode interface (unused) + uint16_t interface_off; ///< Protected mode interface (unused) + uint16_t interface_len; ///< Protected mode interface (unused) +} __attribute__((packed)); +assert_size(VBE, 16); + +/*! \brief Get pointer to Vesa BIOS Extension information + * + * \note Only available if the `MULTIBOOT_VIDEO_MODE` flag was explicitly set + * in the multiboot header (see `boot/multiboot/config.inc`)! + */ +VBE * getVesaBiosExtensionInfo(); + +/*! \brief Video mode: Framebuffer + * + * This beautiful structure contains everything required for using the graphic + * framebuffer in a very handy manner -- however, it may not be well supported + * by current boot loaders... + * These information can be retrieved from \ref VBE as well, though you then + * have to parse these huge structures containing a lot of useless stuff. + */ +struct Framebuffer { + uint64_t address; ///< Physical address of the framebuffer + uint32_t pitch; ///< Number of bytes per row + uint32_t width; ///< Width of framebuffer + uint32_t height; ///< Height of framebuffer + uint8_t bpp; ///< Bits per pixel + enum Type : uint8_t { + INDEXED = 0, ///< Using a custom color palette + RGB = 1, ///< Standard red-green-blue + EGA_TEXT = 2 ///< Enhanced Graphics Adapter color palette + } type; + union { + /*! \brief For INDEXED type + */ + struct { + uint32_t palette_addr; ///< Address of an array with RGB values + uint16_t palette_num_colors; ///< Number of colors (in array above) + } __attribute__((packed)); + + /*! \brief For RGB type + */ + struct { + uint8_t offset_red; ///< Offset of red value + uint8_t bits_red; ///< Bits used in red value + uint8_t offset_green; ///< Offset of green value + uint8_t bits_green; ///< Bits used in green value + uint8_t offset_blue; ///< Offset of blue value + uint8_t bits_blue; ///< Bits used in blue value + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); +assert_size(Framebuffer, 28); + +/*! \brief Get pointer to framebuffer information + * + * \note Only available if the `MULTIBOOT_VIDEO_MODE` flag was explicitly set + * in the multiboot header (see `boot/multiboot/config.inc`)! + */ +Framebuffer * getFramebufferInfo(); +} // namespace Multiboot diff --git a/boot/multiboot/header.asm b/boot/multiboot/header.asm new file mode 100644 index 0000000..1d4e827 --- /dev/null +++ b/boot/multiboot/header.asm @@ -0,0 +1,33 @@ +; The first 8192 bytes of the kernel binary must contain a header with +; predefined (and sometimes "magic") values according to the Multiboot standard. +; Based on these values, the boot loader decides whether and how to load the +; kernel -- which is compiled and linked into an ELF file. +; To make this possible with your StuBS kernel, the linker places the following +; entry `multiboot_header` at the very beginning of the file thanks to the +; linker script (located in compiler/sections.ld). + +[SECTION .multiboot_header] + +; Include configuration +%include 'boot/multiboot/config.inc' + +; Multiboot Header +ALIGN 4 +multiboot_header: + dd MULTIBOOT_HEADER_MAGIC_OS ; Magic Header Value + dd MULTIBOOT_HEADER_FLAGS ; Flags (affects following entries) + dd MULTIBOOT_HEADER_CHKSUM ; Header Checksum + + ; Following fields would have been required to be defined + ; if flag A_OUT KLUDGE was set (but we don't need this) + dd 0 ; Header address + dd 0 ; Begin of load address + dd 0 ; end of load address + dd 0 ; end of bss segment + dd 0 ; address of entry function + + ; Following fields are required for video mode (flag MULTIBOOT_VIDEO_MODE) + dd 0 ; Mode: 0 = Graphic / 1 = Text + dd MULTIBOOT_VIDEO_WIDTH ; Width (pixels / columns) + dd MULTIBOOT_VIDEO_HEIGHT ; Height (pixels / rows) + dd MULTIBOOT_VIDEO_BITDEPTH ; color depth / number of colors diff --git a/boot/startup.asm b/boot/startup.asm new file mode 100644 index 0000000..d7ea7c7 --- /dev/null +++ b/boot/startup.asm @@ -0,0 +1,77 @@ +; This is the actual entry point of the kernel. +; The switch into the 32-bit 'Protected Mode' has already been performed +; (by the boot loader). +; The assembly code just performs the absolute necessary steps (like setting up +; the stack) to be able to jump into the C++ code -- and continue further +; initialization in a (more) high-level language. + +[BITS 32] + +; External functions and variables +[EXTERN CPU_CORE_STACK_SIZE] ; Constant containing the initial stack size (per CPU core), see `machine/core.cc` +[EXTERN cpu_core_stack_pointer] ; Pointer to reserved memory for CPU core stacks, see `machine/core.cc` +[EXTERN gdt_protected_mode_pointer] ; Pointer to 32 Bit Global Descriptor Table (located in `machine/gdt.cc`) +[EXTERN long_mode] ; Low level function to jump into the 64-bit mode ('Long Mode', see `boot/longmode.asm`) +[EXTERN multiboot_addr] ; Variable, in which the Pointer to Multiboot information + ; structure should be stored (`boot/multiboot/data.cc`) + +; Load Multiboot settings +%include "boot/multiboot/config.inc" + +[SECTION .text] + +; Entry point for the bootstrap processor (CPU0) +[GLOBAL startup_bsp] +startup_bsp: + ; Check if kernel was booted by a Multiboot compliant boot loader + cmp eax, MULTIBOOT_HEADER_MAGIC_LOADER + jne skip_multiboot + ; Pointer to Multiboot information structure has been stored in ebx by the + ; boot loader -- copy to a variable for later usage. + mov [multiboot_addr], ebx + +skip_multiboot: + ; Disable interrupts + cli + ; Disable non maskable interrupts (NMI) + ; (we are going to ignore them) + mov al, 0x80 + out 0x70, al + + jmp load_cs + +; Segment initialization +; (code used by bootstrap and application processors as well) +[GLOBAL segment_init] +segment_init: + ; Load temporary protected mode Global Descriptor Table (GDT) + lgdt [gdt_protected_mode_pointer] + + ; Initialize segment register + mov ax, 0x10 + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + + ; Load code segment register + jmp 0x8:load_cs + +load_cs: + ; Initialize stack pointer: + ; Atomic increment of `cpu_core_stack_pointer` by `CPU_CORE_STACK_SIZE` + ; (to avoid race conditions at application processor boot) + mov eax, [CPU_CORE_STACK_SIZE] + lock xadd [cpu_core_stack_pointer], eax + ; Since the stack grows into the opposite direction, + ; Add `CPU_CORE_STACK_SIZE` again + add eax, [CPU_CORE_STACK_SIZE] + ; Assign stack pointer + mov esp, eax + + ; Clear direction flag for string operations + cld + + ; Switch to long mode (64 bit) + jmp long_mode diff --git a/boot/startup.cc b/boot/startup.cc new file mode 100644 index 0000000..65af964 --- /dev/null +++ b/boot/startup.cc @@ -0,0 +1,86 @@ +#include "startup.h" + +#include "types.h" +#include "compiler/libc.h" +#include "debug/output.h" +#include "debug/kernelpanic.h" +#include "interrupt/handler.h" +#include "machine/acpi.h" +#include "machine/apic.h" +#include "machine/core.h" +#include "machine/idt.h" +#include "machine/pic.h" + +/*! \brief The first processor is the Bootstrap Processor (BSP) + */ +static bool is_bootstrap_processor = true; + +extern "C" [[noreturn]] void kernel_init() { + if (is_bootstrap_processor) { + is_bootstrap_processor = false; + /* Setup and load Interrupt Descriptor Table (IDT) + * + * On the first call to \ref kernel_init(), we have to assign the + * addresses of the entry functions for each interrupt + * (called 'interrupt_entry_VECTOR', defined in `interrupt/handler.asm`) + * to the IDT. These entry functions save the context and call the C++ + * \ref interrupt_handler(). As the \ref IDT is used by all CPUs, + * it is sufficient to do this initialization on only the BSP (first core). + */ + for (int i = 0; i < Core::Interrupt::VECTORS ; i++) { + IDT::handle(i, interrupt_entry[i]); + } + IDT::load(); + + // Initialize PICs + PIC::initialize(); + + // Call global constructors + CSU::initializer(); + + // Initialize ACPI + if (!ACPI::init()) { + DBG_VERBOSE << "No ACPI!"; + Core::die(); + } + // Initialize APIC (using ACPI) + if (!APIC::init()) { + DBG_VERBOSE << "APIC Initialization failed"; + Core::die(); + } + + // Initialize the Bootstrap Processor + Core::init(); + + // Go to main function + main(); + + // Exit CPU + DBG_VERBOSE << "CPU core " << Core::getID() << " (BSP) shutdown." << endl; + Core::exit(); + } else { + // Load Interrupt Descriptor Table (IDT) + IDT::load(); + + // Initialize this application processor + Core::init(); + + // And call the AP main + main_ap(); + + // Exit CPU + DBG_VERBOSE << "CPU core " << Core::getID() << " (AP) shutdown." << endl; + Core::exit(); + } + + // Only on last core + if (Core::countOnline() == 1) { + // Call global destructors + CSU::finalizer(); + } + + // wait forever + while (true) { + Core::die(); + } +} diff --git a/boot/startup.h b/boot/startup.h new file mode 100644 index 0000000..dc443e8 --- /dev/null +++ b/boot/startup.h @@ -0,0 +1,46 @@ +/*! \file + * \brief Startup of the first core, also known as bootstrap processor (BSP) + */ +#pragma once + +#include "compiler/fix.h" + +/*! \brief Entry point of your kernel + * + * \ingroup Startup + * + * Executed by boot loader. + * Stores Pointer to \ref Multiboot information structure, + * initializes stack pointer, + * switches to long mode + * and finally calls the C++ \ref kernel_init function + */ +extern "C" void startup_bsp() +ERROR_ON_CALL("The kernel entry point shall never be called from your code!"); + +/*! \brief Initializes the C++ environment and detects system components + * + * \ingroup Startup + * + * The startup code(both for \ref startup_bsp "bootstrap" and \ref startup_ap "application processor") + * jumps to this high level function. After initialization it will call \ref main() + *or \ref main_ap() respectively + */ +extern "C" [[noreturn]] void kernel_init() +ERROR_ON_CALL("The kernel init function shall never be called from your code!"); + +/*! \brief Kernels main function + * + * Called after initialization of the system by \ref kernel_init() + * + * \note This code will only be executed on the booting CPU (i.e., the one with ID 0). + */ +extern "C" int main(); + +/*! \brief Entry point for application processors + * + * Called after initialization of the system by \ref kernel_init() + * + * \note Code in this function will be executed on all APs (i.e., all CPUs except ID 0) + */ +extern "C" int main_ap(); diff --git a/boot/startup_ap.asm b/boot/startup_ap.asm new file mode 100644 index 0000000..b7bac5c --- /dev/null +++ b/boot/startup_ap.asm @@ -0,0 +1,66 @@ +; Startup of the remaining application processors (in real mode) +; and switching to 'Protected Mode' with a temporary GDT. +; This code is relocated by ApplicationProcessor::relocateSetupCode() + +[SECTION .setup_ap_seg] +[GLOBAL setup_ap_gdt] +[GLOBAL setup_ap_gdtd] + +; Unlike the bootstrap processor, the application processors have not been +; setup by the boot loader -- they start in real mode (16 bit) and have to be +; switched manually to protected mode (32 bit) +[BITS 16] + +setup_ap: + ; Initialize segment register + mov ax, cs ; Code segment and... + mov ds, ax ; .. data segment should point to the same segment + ; (we don't use stack / stack segment) + + ; Disable interrupts + cli + ; Disable non maskable interrupts (NMI) + mov al, 0x80 + out 0x70, al + + ; load temporary real mode Global Descriptor Table (GDT) + lgdt [setup_ap_gdtd - setup_ap] + + ; Switch to protected mode: + ; enable protected mode bit (1 << 0) in control register 0 + mov eax, cr0 + or eax, 1 + mov cr0, eax + ; Far jump to 32 bit `startup_ap` function + jmp dword 0x08:startup_ap + +; memory reserved for temporary real mode GDT +; initialized by ApplicationProcessor::relocateSetupCode() +ALIGN 4 +setup_ap_gdt: + dq 0,0,0,0,0 ; reserve memory for at least 5 GDT entries + +; memory reserved for temporary real mode GDT descriptor +; initialized by ApplicationProcessor::relocateSetupCode() +setup_ap_gdtd: + dw 0,0,0,0,0 ; reserve memory for GDT descriptor + +[SECTION .text] + +[BITS 32] + +; Segment initialization defined in `boot/startup.asm` +[EXTERN segment_init] + +; protected mode (32 bit) startup code for application processor +startup_ap: + ; reload all segment selectors (since they still point to the real mode GDT) + mov ax, 0x10 + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + + ; Use same segment initialization function as bootstrap processor + jmp segment_init diff --git a/boot/startup_ap.cc b/boot/startup_ap.cc new file mode 100644 index 0000000..3e95dae --- /dev/null +++ b/boot/startup_ap.cc @@ -0,0 +1,83 @@ +#include "startup_ap.h" +#include "utils/string.h" +#include "utils/size.h" +#include "debug/output.h" +#include "debug/assert.h" +#include "machine/lapic.h" +#include "machine/core_interrupt.h" +#include "machine/gdt.h" +#include "machine/pit.h" + +namespace ApplicationProcessor { + +// Make sure that the RELOCATED_SETUP is in low memory (< 1 MiB) +static_assert((RELOCATED_SETUP & ~0x000ff000) == 0, "Not a valid 1 MB address for RELOCATED_SETUP!"); + +/*! \brief Temporary Global Descriptor Table + * + * Blue print, to be copied into real mode code + */ +constexpr GDT::SegmentDescriptor ap_gdt[] = { + // nullptr-Deskriptor + {}, + + // code segment + { /* base = */ 0x0, + /* limit = */ 0xFFFFFFFF, + /* code = */ true, + /* ring = */ 0, + /* size = */ GDT::SIZE_32BIT }, + + // data segment + { /* base = */ 0x0, + /* limit = */ 0xFFFFFFFF, + /* code = */ false, + /* ring = */ 0, + /* size = */ GDT::SIZE_32BIT }, +}; + +void relocateSetupCode() { + // Relocated setup code + memcpy(reinterpret_cast(RELOCATED_SETUP), &___SETUP_AP_START__, &___SETUP_AP_END__ - &___SETUP_AP_START__); + + // Adjust GDT: + // Calculate offset for real mode GDT and GDT descriptor + uintptr_t ap_gdt_offset = reinterpret_cast(&setup_ap_gdt) - + reinterpret_cast(&___SETUP_AP_START__); + uintptr_t ap_gdtd_offset = reinterpret_cast(&setup_ap_gdtd) - + reinterpret_cast(&___SETUP_AP_START__); + + // Copy blue print of real mode GDT to the relocated memory + void * relocated_ap_gdt = reinterpret_cast(RELOCATED_SETUP + ap_gdt_offset); + memcpy(relocated_ap_gdt, &ap_gdt, sizeof(ap_gdt)); + + // Calculate GDT descriptor for relocated address + GDT::Pointer * relocated_ap_gdtd = reinterpret_cast(RELOCATED_SETUP + ap_gdtd_offset); + relocated_ap_gdtd->set(relocated_ap_gdt, size(ap_gdt)); +} + +void boot(void) { + assert(!Core::Interrupt::isEnabled() && "Interrupts should not be enabled before APs have booted!"); + + // Relocate setup code + relocateSetupCode(); + + // Calculate Init-IPI vector based on address of relocated setup_ap() + uint8_t vector = RELOCATED_SETUP >> 12; + + // Send Init-IPI to all APs + LAPIC::IPI::sendInit(); + + // wait at least 10ms + PIT::delay(10000); + + // Send Startup-IPI twice + DBG_VERBOSE << "Sending STARTUP IPI #1" << endl; + LAPIC::IPI::sendStartup(vector); + // wait at least 200us + PIT::delay(200); + + DBG_VERBOSE << "Sending STARTUP IPI #2" << endl; + LAPIC::IPI::sendStartup(vector); +} +} // namespace ApplicationProcessor diff --git a/boot/startup_ap.h b/boot/startup_ap.h new file mode 100644 index 0000000..5a201e9 --- /dev/null +++ b/boot/startup_ap.h @@ -0,0 +1,109 @@ +/*! \file + * \brief Startup of additional cores, the application processors (APs) + */ + +#pragma once + +#include "types.h" +#include "compiler/fix.h" + +/*! \brief Application Processor Boot + * + * Interface to boot the APs + */ +namespace ApplicationProcessor { + /*! \brief Address (below 1 MiB) to which the setup code gets relocated + */ + const uintptr_t RELOCATED_SETUP = 0x40000; + + /*! \brief Relocate the real mode setup code + * + * The application processors (APs) start in real mode, which means that your setup + * code must be placed within the first megabyte -- your operating system resides + * currently at a much higher address (16 MiB), so the code has to be copied + * down there first. + * + * Luckily, the code in `setup_ap()` can be relocated by copying -- because it + * does not use any absolute addressing (except when jumping to the protected + * mode function `startup_ap()`). + * The function must be copied to the address of \ref RELOCATED_SETUP (0x40000), + * so that the APs can start there. + * + * The memory section contains a reserved area for the \ref GDT and its descriptor, + * which has to be assigned first with the contents of \ref ap_gdt. + * + * \note You could also tell the linker script to put the code directly + * at the appropriate place, but unfortunately the Qemu multiboot + * implementation (via `-kernel` parameter) can't handle it properly. + */ + void relocateSetupCode(); + + /*! \brief Boot all application processors + * + * Performs relocation by calling \ref relocateSetupCode() + * + * \see [ISDMv3, 8.4.4.2 Typical AP Initialization Sequence](intel_manual_vol3.pdf#page=276) + */ + void boot(); +} // namespace ApplicationProcessor + +/*! \brief Begin of setup code for application processors + * + * The setup code has to switch from real mode (16 bit) to protected mode (32 bit), + * hence it is written in assembly and must be executed in low memory (< 1 MiB). + * + * After kernel start the code is somewhere above 16 MiB (the bootstrap + * processor was already launched in protected mode by the boot loader). + * Therefore this symbol is required for relocate the code to the position + * specified by \ref ApplicationProcessor::RELOCATED_SETUP. + * + * Luckily, the `setup_ap` code in `boot/startup_ap.asm` is rather simple and + * doesn't depend on absolute addressing -- and is therefore relocatable. + * + * Relocation is done by the function \ref ApplicationProcessor::relocateSetupCode() + * + * The `___SETUP_AP_START__` symbol is defined in the linker script (`compiler/section.ld`) + */ +extern char ___SETUP_AP_START__; + +/*! \brief End of startup code for application processors + * + * This Symbol is defined in the linker script (`compiler/section.ld`) + */ +extern char ___SETUP_AP_END__; + +/*! \brief Memory reserved for a temporary real mode GDT + * within the relocatable memory area of the setup code + */ +extern char setup_ap_gdt; + +/*! \brief Memory reserved for a temporary real mode GDT descriptor + * within the relocatable memory area of the setup code + */ +extern char setup_ap_gdtd; + +/*! \brief Entry point for application processors + * + * Unlike the bootstrap processor, the application processors have not been + * setup by the boot loader -- they start in `Real Mode` (16 bit) and have to be + * switched manually to `Protected Mode` (32 bit). + * This is exactly what this real mode function does, handing over control + * to the (32 bit) function \ref startup_ap() + * + * This code is written is assembly (`boot/startup_ap.asm`) and relocated by + * \ref ApplicationProcessor::relocateSetupCode() during + * \ref ApplicationProcessor::boot() + */ +extern "C" void setup_ap() +ERROR_ON_CALL("The setup function for application processors shall never be called from your code!"); + +/*! \brief Startup for application processors + * \ingroup Startup + * + * This function behaves similar to \ref startup_bsp(): + * Initializes stack pointer, + * switches to long mode + * and calls the C++ \ref kernel_init function + */ +extern "C" void startup_ap() +ERROR_ON_CALL("The startup function for application processors shall never be called from your code!"); diff --git a/compiler/crti.asm b/compiler/crti.asm new file mode 100644 index 0000000..205f2cd --- /dev/null +++ b/compiler/crti.asm @@ -0,0 +1,18 @@ +; C Runtime Objects - Function prologues for the initialization (.init) and termination routines (.fini) required by the runtime libs. +; When developing on Linux, these files are automatically added by the compiler/linker; we, however, are writing an operating system +; and therefore need to add them ourselves. + +; C Runtime - beginning (needs to be passed as first element to the linker) +[SECTION .init] +[GLOBAL _init] +_init: + push rbp + mov rbp, rsp + ; The linker will inject the contents of the .init from crtbegin.o here + +[SECTION .fini] +[GLOBAL _fini] +_fini: + push rbp + mov rbp, rsp + ; The linker will inject the contents of the .fini from crtbegin.o here diff --git a/compiler/crtn.asm b/compiler/crtn.asm new file mode 100644 index 0000000..ddf900f --- /dev/null +++ b/compiler/crtn.asm @@ -0,0 +1,12 @@ +; C Runtime Objects - Function prologues for the initialization (.init) and termination routines (.fini) required by the runtime libs. + +; C Runtime - end (needs to be passed as last element to the linker) +[SECTION .init] + ; The linker will inject the contents of the .init from crtend.o here + pop rbp + ret + +[SECTION .fini] + ; The linker will inject the contents of the .fini from crtend.o here + pop rbp + ret diff --git a/compiler/fix.h b/compiler/fix.h new file mode 100644 index 0000000..e4eb161 --- /dev/null +++ b/compiler/fix.h @@ -0,0 +1,21 @@ +/*! \file + * \brief Compiler-dependent fixes & idiosyncrasies + */ + +#pragma once + +#ifdef __clang__ +# define UNUSED_STRUCT_FIELD __attribute__((unused)) +#else +// GCC does not understand this attribute correctly for structures +# define UNUSED_STRUCT_FIELD +#endif + +#if defined(__GNUC__) && !defined(__clang__) +// Only GCC understands the error attribute +# define ERROR_ON_CALL(MSG) __attribute__((error(MSG))); +#else +# define ERROR_ON_CALL(MSG) +#endif + +#define MAYBE_UNUSED __attribute__((unused)) diff --git a/compiler/libc.cc b/compiler/libc.cc new file mode 100644 index 0000000..d0d7e68 --- /dev/null +++ b/compiler/libc.cc @@ -0,0 +1,44 @@ +#include "compiler/libc.h" +#include "types.h" + +/*! \brief Function pointer for initialization/finalization functions for global objects + * required since GCC 4.7 and later. + * + * These symbols appear kind of magically due to the compiler + */ +extern void(*__preinit_array_start[]) (); +extern void(*__preinit_array_end[]) (); +extern void(*__init_array_start[]) (); +extern void(*__init_array_end[]) (); +extern void(*__fini_array_start[]) (); +extern void(*__fini_array_end[]) (); + +extern "C" void _init(); +extern "C" void _fini(); + +namespace CSU { + +void initializer() { + const unsigned int preinit_size = __preinit_array_end - __preinit_array_start; + for (unsigned int i = 0; i != preinit_size; ++i) { + (*__preinit_array_start[i])(); + } + + _init(); + + const size_t size = __init_array_end - __init_array_start; + for (size_t i = 0; i < size; i++) { + (*__init_array_start[i])(); + } +} + +void finalizer() { + const unsigned int fini_size = __fini_array_end - __fini_array_start; + for (unsigned int i = 0; i != fini_size; ++i) { + (*__fini_array_start[i])(); + } + + _fini(); +} + +} // namespace CSU diff --git a/compiler/libc.h b/compiler/libc.h new file mode 100644 index 0000000..014876c --- /dev/null +++ b/compiler/libc.h @@ -0,0 +1,22 @@ +/*! \file + * \brief Initialization functions for global objects required by the compiler + */ + +#pragma once + +/*! \brief C StartUp (CSU) + * required by the compiler and provided by the c standard library + */ +namespace CSU { + +/*! \brief Call global constructors and initialization functions + * (this is usually done by __libc_csu_init) + */ +void initializer(); + +/*! \brief Call global destructors and finalizer functions + * (this is usually done by __libc_csu_fini) + */ +void finalizer(); + +} // namespace CSU diff --git a/compiler/libcxx.cc b/compiler/libcxx.cc new file mode 100644 index 0000000..88cd351 --- /dev/null +++ b/compiler/libcxx.cc @@ -0,0 +1,31 @@ +/*! \file + * \brief C++ runtime support functions + */ + +#include "types.h" + +void operator delete(void *ptr) { + (void) ptr; +} + +void operator delete(void *ptr, size_t size) { + (void) ptr; + (void) size; +} + +extern "C" int __cxa_atexit(void (*func)(void *), void * arg, void * dso_handle) { + // Registers a function that will be executed on exit. + // We simply ignore those functions, as we don't need them for our operating systems. + + (void) func; + (void) arg; + (void) dso_handle; + + return 0; +} + +extern "C" [[noreturn]] void __cxa_pure_virtual() { + // Pure virtual function was called -- this if obviously not valid, + // therefore we wait infinitely. + while (true) {} +} diff --git a/compiler/sections.ld b/compiler/sections.ld new file mode 100644 index 0000000..a0ad7e5 --- /dev/null +++ b/compiler/sections.ld @@ -0,0 +1,119 @@ +/* Entry in our OS -- label 'startup_bsp' in file boot/startup.asm */ +ENTRY(startup_bsp) + +SECTIONS + { + /* start address of our kernel */ + . = 16M; + + /* This is a linker script defined "variable" (without value -- therefore + * better to be considered as a symbol), which can be referenced in the C++ + * source code using `&___KERNEL_START___` (mind the reference operator!) + * to get a pointer to the current (virtual) memory position. + * However, a previous declaration with C linkage is required, e.g. + * extern "C" void * ___KERNEL_START___; + * For more information have a look at + * https://sourceware.org/binutils/docs/ld/Source-Code-Reference.html + */ + ___KERNEL_START___ = .; + + .boot : + { + /* Multiboot Header should be at the very beginning of the binary */ + *(.multiboot_header) + } + + /* The (executable) machine code */ + .text : + { + *(.text .text.* .gnu.linkonce.t.*) + *(.init) + *(.fini) + } + + /* Align to the next page border + * allowing the linker to mark the section above as [read and] executable + * while the following sections is just read-only. */ + . = ALIGN(0x1000); + + /* Start for application processors, relocated by APIC::init() + * to a below 1 MB address to boot from real mode. + * + * Please note: + * It is possible to let the linker place it at a below 1 MB address, + * while all the rest starts at 16 MB. This will work for multiboot + * compliant boot loader like GRUB and PXELINUX, however, + * the qemu boot loader cannot handle such ELF files (yet)... + * That's why we have to copy it ourself in the kernel */ + .setup_ap_seg ALIGN(0x10) : + { + ___SETUP_AP_START__ = .; + *(.setup_ap_seg) + } + ___SETUP_AP_END__ = .; + + /* Read-only data */ + .rodata : + { + *(.rodata .rodata.* .gnu.linkonce.r.*) + KEEP(*(.note.gnu.build-id)) + } + + /* Align to the next page border + * allowing the linker to mark the section below as [read and] writeable */ + . = ALIGN(0x1000); + + /* lists containing the start address of global constructors and destructors (generated by the compiler) */ + .preinit_array : + { + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + } + .init_array : + { + PROVIDE_HIDDEN (__init_array_start = .); + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + } + .fini_array : + { + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + } + + /* the data section containing initialized static variables + * (writeable with a value not zero) */ + .data : + { + *(.data .data.* .gnu.linkonce.d.*) + /* Global offset table */ + *(.got*) + /* Exception Handling */ + *(.eh_frame*) + } + + /* the bss (block starting symbol) section containing uninitialized + * static variables (writeable with an initial value of zero); + * this section does not consume any memory in the binary */ + .bss : + { + *(.bss .bss.* .gnu.linkonce.b.*) + *(COMMON) + } + + ___KERNEL_END___ = .; + + /* Sections which should not be included in the binary */ + /DISCARD/ : + { + *(.note) + *(.comment) + /* Debug information (commented out to keep it) + *(.debug*) + */ + } + } diff --git a/debug/assert.cc b/debug/assert.cc new file mode 100644 index 0000000..7c3127f --- /dev/null +++ b/debug/assert.cc @@ -0,0 +1,12 @@ +#include "assert.h" + +[[noreturn]] void assertion_failed(const char * exp, const char * func, const char * file, int line) { + (void) exp; + (void) func; + (void) file; + (void) line; + // TODO: Print error message (in debug window) + // TODO: Then stop the current core permanently + // Use appropriate method from class Core to do so. + while(true) {} // wait forever +} diff --git a/debug/assert.h b/debug/assert.h new file mode 100644 index 0000000..8bee097 --- /dev/null +++ b/debug/assert.h @@ -0,0 +1,63 @@ +/*! \file + * \brief Contains several macros usable for making assertions + * + * Depending on the type of assertion (either static or at runtime), a failing assertion will trigger an error. + * For static assertion, this error will be shown at compile time and abort compilation. + * Runtime assertions will trigger a message containing details about the error occurred and will make the CPU die. + */ + +#pragma once + +#ifndef STRINGIFY +/*! \def STRINGIFY(S) + * \brief Converts a macro parameter into a string + * \ingroup debug + * \param S Expression to be converted + * \return stringified version of S + */ +#define STRINGIFY(S) #S +#endif + +/*! \def assert_size(TYPE, SIZE) + * \brief Statically ensure (at compile time) that a data type (or variable) has the expected size. + * \ingroup debug + * \param TYPE The type to be checked + * \param SIZE Expected size in bytes + */ +#define assert_size(TYPE, SIZE) \ + static_assert(sizeof(TYPE) == (SIZE), "Wrong size for " STRINGIFY(TYPE)) + +/*! \def assert(EXP) + * \brief Ensure (at execution time) an expression evaluates to `true`, print an error message and stop the CPU otherwise. + * \ingroup debug + * \param EXP The expression to be checked + */ +#ifdef NDEBUG +#define assert(EXP) \ + do { \ + (void)sizeof(EXP); \ + } while (false) +#else +#define assert(EXP) \ + do { \ + if (__builtin_expect(!(EXP), 0)) { \ + assertion_failed(STRINGIFY(EXP), __func__, __FILE__, __LINE__); \ + } \ + } while (false) + +/*! \brief Handles a failed assertion + * + * This function will print a message containing further information about the + * failed assertion and stops the current CPU permanently. + * + * \note This function should never be called directly, but only via the macro `assert`. + * + * \todo Implement Remainder of Method (output & CPU stopping) + * + * \param exp Expression that did not hold + * \param func Name of the function in which the assertion failed + * \param file Name of the file in which the assertion failed + * \param line Line in which the assertion failed + */ +[[noreturn]] void assertion_failed(const char * exp, const char * func, const char * file, int line); +#endif diff --git a/debug/kernelpanic.h b/debug/kernelpanic.h new file mode 100644 index 0000000..fa75025 --- /dev/null +++ b/debug/kernelpanic.h @@ -0,0 +1,22 @@ +/*! \file + * \brief Macro to print an error message and stop the current core. + */ + +#pragma once + +/*! \def kernelpanic + * \brief Print an error message in the debug window and \ref Core::die "stop the current core" + * \ingroup debug + * \param MSG error message + */ +#define kernelpanic(MSG) \ + do { \ + DBG << "PANIC: '" << (MSG) << "' in " << __func__ \ + << " @ " << __FILE__ << ":" << __LINE__ \ + << flush; \ + Core::die(); \ + } while (0) + +// The includes are intentionally placed at the end, so the macro can be used inside those included files as well. +#include "debug/output.h" +#include "machine/core.h" diff --git a/debug/nullstream.cc b/debug/nullstream.cc new file mode 100644 index 0000000..8213370 --- /dev/null +++ b/debug/nullstream.cc @@ -0,0 +1,4 @@ +#include "debug/nullstream.h" + +// Instance +NullStream nullstream; diff --git a/debug/nullstream.h b/debug/nullstream.h new file mode 100644 index 0000000..220bd1a --- /dev/null +++ b/debug/nullstream.h @@ -0,0 +1,39 @@ +/*! \file + * \brief \ref NullStream is a stream discarding everything + */ + +#pragma once + +#include "object/outputstream.h" + +/*! \brief Ignore all data passed by the stream operator + * \ingroup io + * + * Can be used instead of the \ref OutputStream if (for debugging reasons) all + * output should be ignored, e.g. for \ref DBG_VERBOSE + * + * By using template programming, a single generic methods is sufficient + * (which simply discard everything). + */ +class NullStream { + public: + /*! \brief Empty default constructor + */ + NullStream() {} + + /*! \brief Generic stream operator for any data type + * + * Uses template meta programming for a generic & short solution + * + * \tparam T Type of data to ignore + * \param value data to be ignore + * \return Reference to the \ref NullStream object allowing concatenation of operators + */ + template + NullStream& operator << (T value) { + (void) value; + return *this; + } +}; + +extern NullStream nullstream; diff --git a/debug/output.h b/debug/output.h new file mode 100644 index 0000000..7278abb --- /dev/null +++ b/debug/output.h @@ -0,0 +1,65 @@ +/*! \file + * \brief Debug macros enabling debug output on a separate window for each core. + */ + +#pragma once + +/*! \def DBG_VERBOSE + * \brief An output stream, which is only displayed in the debug window in verbose mode + * + * \note If a serial console has been implemented, the output can be redirected + * to the serial stream instead (by changing the macro) -- this makes the + * (usually) very large output more readable (since it allows scrolling back) + */ +#ifdef VERBOSE +// If VERBOSE is defined, forward everything to \ref DBG +#define DBG_VERBOSE DBG +#else +// Otherwise sent everything to the NullStream (which will simply discard everything) +#define DBG_VERBOSE nullstream +// in this case we have to include the null stream +#include "debug/nullstream.h" +#endif + +/*! \def DBG + * \brief An output stream, which is displayed in the debug window of the core it was executed on + * + * In single core (\OOStuBS) this is just an alias to the debug window object + * `dout`. + * However, on a multi core system a debug window for each core is + * required, therefore `dout` has to be an \ref TextStream object array with the + * core ID as array index -- the selection is done via Core::getID() + * + */ +#define DBG dout[Core::getID()] + +#include "device/textstream.h" +#include "machine/core.h" + +/*! \brief Debug window + * + * Debug output using \ref DBG like + * `DBG << "var = " << var << endl` + * should be displayed in window dedicated to the core it is executed on. + * + * While this is quite easy on single core systems like \OOStuBS -- they only + * require a single \ref TextStream object called `dout` -- multi core systems + * like \MPStuBS need an object array with one window per core. + * In the latter case direct list initialization can be used: + * + * \code{.cpp} + * TextStream dout[Core::MAX]{ + * {0, 40, 17, 21}, // Debug window for core 0, like TextStream(0, 40, 17, 21) + * {40, 80, 17, 21}, // Debug window for core 1, like TextStream(40, 80, 17, 21) + * //... + * }; + * \endcode + * + * The debug windows in should be located right below the normal output window + * without any overlap and should be able to display at least 3 lines. + * In \MPStuBS, two windows can be placed side-by-side, having 40 columns each. + * + * \todo Define `dout` + */ +extern TextStream dout[Core::MAX]; + diff --git a/device/serialstream.cc b/device/serialstream.cc new file mode 100644 index 0000000..b330a67 --- /dev/null +++ b/device/serialstream.cc @@ -0,0 +1,40 @@ +#include "device/serialstream.h" +#include "debug/output.h" + +SerialStream::SerialStream(ComPort port, BaudRate baud_rate, DataBits data_bits, StopBits stop_bits, Parity parity) + : Serial(port, baud_rate, data_bits, stop_bits, parity) {} + +void SerialStream::flush() { +} + +void SerialStream::setForeground(Color c) { + (void) c; +} + +void SerialStream::setBackground(Color c) { + (void) c; +} + +void SerialStream::setAttribute(Attrib a) { + (void) a; +} + +void SerialStream::reset() { +} + +void SerialStream::setPos(int x, int y) { + (void) x; + (void) y; +} + +bool SerialStream::getPos(int &x, int &y) { + (void) x; + (void) y; + return false; +} + +void SerialStream::print(char* str, int length) { + (void) str; + (void) length; +} + diff --git a/device/serialstream.h b/device/serialstream.h new file mode 100644 index 0000000..8a4423c --- /dev/null +++ b/device/serialstream.h @@ -0,0 +1,163 @@ +/*! \file + * \brief \ref Serial \ref SerialStream "output stream" + */ + +#pragma once + +#include "object/outputstream.h" +#include "machine/serial.h" + +/*! \brief Console (VT100 compatible) via \ref Serial interface. + * \ingroup io + * + * This class allows to connect a VT100-compatible display terminal via + * the serial interface. + * + * The utility 'screen' can be used to attach a terminal to an interface + * at a specified connection speed: `screen /dev/ttyS0 115200` + * + * Color and position can be adjusted with the help of + * [escape codes](https://web.archive.org/web/20210126100003/https://ascii-table.com/ansi-escape-sequences-vt-100.php). + */ + +class SerialStream : public OutputStream, public Serial { + /*! \brief Helper to send a multi-digit number as human readable ASCII characters + * \param num Number to send + */ + void write_number(int num); + + public: + /*! \brief Attributes + * can be used to influence the display of the output. + * + * \note The attributes might not be supported or have a different effect + * depending on the terminal emulator! + */ + enum Attrib { + RESET = 0, ///< Turn off character attributes + BRIGHT = 1, ///< Bold + DIM = 2, ///< Low intensity (dimmed) + UNDERSCORE = 4, ///< Underline + BLINK = 5, ///< Blink (slow) + REVERSE = 7, ///< Swap fore & background + HIDDEN = 8, ///< Concealed + }; + + /*! \brief Color codes + * + * Default VT100 supports eight colors for both foreground and background + * (later versions 256 [8 bit] and even true color [32 bit]). + * The actual color is affected by the attributes and can look significantly + * different depending on the terminal emulator. + */ + enum Color { + BLACK = 0, + RED = 1, + GREEN = 2, + YELLOW = 3, + BLUE = 4, + MAGENTA = 5, + CYAN = 6, + WHITE = 7 + }; + + /*! \brief Constructor for the VT100-compatible console + * + * Sets up the serial connection as well + * + * \todo Implement Method + */ + explicit SerialStream(ComPort port = COM1, BaudRate baud_rate = BAUD_115200, DataBits data_bits = DATA_8BIT, + StopBits stop_bits = STOP_1BIT, Parity parity = PARITY_NONE); + + /*! \brief Method to output the buffer contents of the base class \ref Stringbuffer + * + * The method is automatically called when the buffer is full, + * but can also be called explicitly to force output of the current buffer. + * + * \todo Implement Method + */ + void flush(); + + /*! \brief Change foreground color (for subsequent output) + * + * \todo Implement Method + * + * \param c Color + */ + void setForeground(Color c); + + /*! \brief Change background color (for subsequent output) + * + * \todo Implement Method + * + * \param c Color + */ + void setBackground(Color c); + + /*! \brief Change text attribute (for subsequent output) + * + * \todo Implement Method + * + * \param a Attribute + */ + void setAttribute(Attrib a); + + /*! \brief Reset terminal + * + * Clear screen, place cursor at the beginning and reset colors + * and attributes to the default value. + * + * \opt Implement Method + */ + void reset(); + + /*! \brief Set the cursor position + * + * \param x Column in window + * \param y Row in window + * + * \opt Implement Method + */ + void setPos(int x, int y); + + /*! \brief Read the current cursor position + * + * It is possible to receive the current cursor position via a special + * escape code: Request by sending `\e[6n`, answer will be `\e[y;xR` with + * `y` (row) and `x` (column) as human readable ASCII character number. + * + * However, depending on the implementation, it may be possible that the + * system waits endlessly due to an disconnected terminal or data + * transmission error. + * + * \param x Column in window + * \param y Row in window + * \return `true` if position was successfully received + * + * \opt Implement Method + */ + bool getPos(int &x, int &y); + + /*! \brief Display multiple characters in the window starting at the current cursor position + * + * This method can be used to output a string, starting at the current cursor + * position. Since the string does not need to contain a '\0' termination + * (as it is usually the case in C), the parameter `length` is required to + * specify the number of characters in the string. + * + * The text is displayed using the previously configured + * \ref setAttribute() "attributes", \ref setForeground() "fore-" + * and \ref setBackground "background" color. + * + * A line break will occur wherever the character `\n` is inserted + * in the text to be output (for compatibility reasons a `\r` is + * automatically appended). + * + * \todo Implement Method + * \param str String to output + * \param length length of string + */ + void print(char* str, int length); + +}; diff --git a/device/textstream.cc b/device/textstream.cc new file mode 100644 index 0000000..0edb753 --- /dev/null +++ b/device/textstream.cc @@ -0,0 +1,2 @@ +#include "device/textstream.h" + diff --git a/device/textstream.h b/device/textstream.h new file mode 100644 index 0000000..05d2b3d --- /dev/null +++ b/device/textstream.h @@ -0,0 +1,46 @@ +/*! \file + * \brief \ref TextStream outputs text onto the screen in \ref TextMode + */ + +/*! \defgroup io I/O subsystem + * \brief The input/output subsystem + */ + +#pragma once + +#include "object/outputstream.h" +#include "machine/textwindow.h" + +/*! \brief Output text (form different data type sources) on screen in text mode + * \ingroup io + * + * Allows the output of different data types as strings on the \ref TextMode + * screen of a PC. + * To achieve this, \ref TextStream is derived from both \ref OutputStream and + * \ref TextWindow and only implements the method \ref TextStream::flush(). + * Further formatting or special effects are implemented in \ref TextWindow. + */ +class TextStream { + // Prevent copies and assignments + TextStream(const TextStream&) = delete; + TextStream& operator=(const TextStream&) = delete; + public: + /// \copydoc TextWindow::TextWindow(unsigned,unsigned,unsigned,unsigned,bool) + TextStream(unsigned from_col, unsigned to_col, unsigned from_row, unsigned to_row, bool use_cursor = false) { + (void) from_col; + (void) to_col; + (void) from_row; + (void) to_row; + (void) use_cursor; + } + + /*! \brief Output the buffer contents of the base class \ref Stringbuffer + * + * The method is automatically called when the buffer is full, + * but can also be called explicitly to force output of the current buffer. + * + * + * \todo Implement method + */ + void flush(); +}; diff --git a/interrupt/handler.asm b/interrupt/handler.asm new file mode 100644 index 0000000..40d6b89 --- /dev/null +++ b/interrupt/handler.asm @@ -0,0 +1,120 @@ +; Low-level stuff required for interrupt handling +; The "actual" code to be executed is implemented in the C function "interrupt_handler" + +[EXTERN interrupt_handler] + +[SECTION .text] + +; Entry function for interrupt handling (of any vector) + +; The interrupt handling of each vector has to be started in assembler to store +; the scratch registers (see SystemV calling conventions) before the actual +; high-level (C++) interrupt_handler function can be executed. +; +; For this purpose we use a macro to generate a customized entry function for +; each interrupt vector (0-255), wheres the vector itself is the first parameter. +; The second parameter is a boolean flag indicating whether an error code is +; placed on the stack for the corresponding trap (by the CPU). +; +; Usage: IRQ +%macro IRQ 2 +ALIGN 16 +interrupt_entry_%1: + %if %2 == 0 + ; If the vector is not a trap with an error code automatically pushed + ; on the stack, the entry function pushes a zero instead + ; to retain an identical stack layout in each case. + push 0 + %endif + + ; The interrupt may be triggered asynchronously, therefore the whole context + ; has to be saved and restored, or the interrupted code might not be able to + ; continue. The C++ compiler will only generates code to preserve + ; non-scratch registers in the high-level interrupt handler -- the scratch + ; registers have to be saved (and restored later) manually! + push rax + push rcx + push rdx + push rsi + push rdi + push r8 + push r9 + push r10 + push r11 + + ; Clear direction flag for string operations + cld + + ; Assign vector as first parameter for the high-level interrupt handler + mov rdi, %1 + + ; Assign pointer to the context (= interrupt stack) as second parameter + mov rsi, rsp + + ; Call the high-level interrupt handler routine + call interrupt_handler + + ; Restore scratch registers + pop r11 + pop r10 + pop r9 + pop r8 + pop rdi + pop rsi + pop rdx + pop rcx + pop rax + + ; Drop error code (or the fake zero value) + add rsp, 8 + + ; Return from interrupt + iretq +%endmacro + +; For traps the CPU sometimes pushes an error code onto the stack. +; These vectors are documented in the manual (or instead use the +; [osdev wiki](https://wiki.osdev.org/Exceptions) for a handy list). +; Therefore we manually call the macro for the corresponding traps. +IRQ 0, 0 +IRQ 1, 0 +IRQ 2, 0 +IRQ 3, 0 +IRQ 4, 0 +IRQ 5, 0 +IRQ 6, 0 +IRQ 7, 0 +IRQ 8, 1 +IRQ 9, 0 +IRQ 10, 1 +IRQ 11, 1 +IRQ 12, 1 +IRQ 13, 1 +IRQ 14, 1 +IRQ 15, 0 +IRQ 16, 0 +IRQ 17, 1 + +; All subsequent interrupts (18 - 255) have no error code, +; therefore we use a loop calling the macro. +%assign i 18 +%rep 238 +IRQ i, 0 +%assign i i+1 +%endrep + +[SECTION .data] + +; Create a function pointer array for each interrupt entry +; (to be used in C++ for IDT::handle) +[GLOBAL interrupt_entry] + +interrupt_entry: +%macro interrupt_vector 1 + dq interrupt_entry_%1 +%endmacro +%assign i 0 +%rep 256 + interrupt_vector i + %assign i i+1 +%endrep diff --git a/interrupt/handler.cc b/interrupt/handler.cc new file mode 100644 index 0000000..05a83a2 --- /dev/null +++ b/interrupt/handler.cc @@ -0,0 +1,7 @@ +#include "interrupt/handler.h" + +extern "C" void interrupt_handler(Core::Interrupt::Vector vector, InterruptContext *context) { + (void) vector; + (void) context; + +} diff --git a/interrupt/handler.h b/interrupt/handler.h new file mode 100644 index 0000000..031a3d4 --- /dev/null +++ b/interrupt/handler.h @@ -0,0 +1,81 @@ +/*! \file + * \brief \ref interrupt_handler() Interrupt handler + */ +#pragma once + +#include "types.h" +#include "machine/core_interrupt.h" + +/*! \defgroup interrupts Interrupt Handling + * \brief The interrupt subsystem + * + * The interrupt subsystem of StubBS contains all functionality to accept + * interrupts from the hardware and process them. + * In later exercises the interrupts will enable applications to + * execute core functionality (system calls). + * The entry point for the interrupt subsystem is the function + * 'interrupt_entry_VECTOR' (in `interrupt/handler.asm`). + */ + +/*! \brief Preserved interrupt context + * + * After an interrupt was triggered, the core first saves the basic context + * (current code- & stack segment, instruction & stack pointer and the status + * flags register) and looks up the handling function for the vector using the + * \ref IDT. In StuBS for each vector an own `interrupt_entry_VECTOR` function + * (written in assembly in `interrupt/handler.asm`) was registered during boot + * by \ref kernel_init(), which all save the scratch registers on the stack + * before calling the C++ function \ref interrupt_handler(). + * The high-level handler gets a pointer to the part of the stack which + * corresponds to the \ref InterruptContext structure as second parameter. + * After returning from the high-level handler, the previous state is restored + * from this context (scratch register in assembly and basic context while + * executing `iret`) so it can continue transparently at the previous position. + */ +struct InterruptContext { + // Scratch register (stored by `interrupt/handler.asm`) + uintptr_t r11; ///< scratch register R11 + uintptr_t r10; ///< scratch register R10 + uintptr_t r9; ///< scratch register R9 + uintptr_t r8; ///< scratch register R8 + uintptr_t rdi; ///< scratch register RDI + uintptr_t rsi; ///< scratch register RSI + uintptr_t rdx; ///< scratch register RDX + uintptr_t rcx; ///< scratch register RCX + uintptr_t rax; ///< scratch register RAX + + // Context saved by CPU + uintptr_t error_code; ///< Error Code + uintptr_t ip; ///< Instruction Pointer (at interrupt) + uintptr_t cs : 16; ///< Code segment (in case of a ring switch it is the segment of the user mode) + uintptr_t : 0; ///< Alignment (due to 16 bit code segment) + uintptr_t flags; ///< Status flags register + uintptr_t sp; ///< Stack pointer (at interrupt) + uintptr_t ss : 16; ///< Stack segment (in case of a ring switch it is the segment of the user mode) + uintptr_t : 0; ///< Alignment (due to 16 bit stack segment) +} __attribute__((packed)); + +/*! \brief High-Level Interrupt Handling. + * \ingroup interrupts + * + * Main interrupt handling routine of the system. + * This function is called by the corresponding `interrupt_entry_VECTOR` + * function (located in `interrupt/handler.asm`) with disabled interrupts. + * + * \param vector number of the interrupt + * \param context Pointer to interrupt context (on stack). + * + */ +extern "C" void interrupt_handler(Core::Interrupt::Vector vector, InterruptContext *context); + +/*! \brief Array of function pointer to the default low-level interrupt handlers + * + * The index corresponds to the vectors entry function, e.g. `interrupt_entry[6]` + * points to `interrupt_entry_6`, handling the trap for + * \ref Core::Interrupt::INVALID_OPCODE "invalid opcode". + * + * The entry functions and this array are defined in assembly in + * `interrupt/handler.asm` and used in \ref kernel_init() to + * initialize the \ref IDT "Interrupt Descriptor Table (IDT)". + */ +extern "C" void * const interrupt_entry[]; diff --git a/machine/acpi.cc b/machine/acpi.cc new file mode 100644 index 0000000..ef9faa6 --- /dev/null +++ b/machine/acpi.cc @@ -0,0 +1,139 @@ +#include "machine/acpi.h" +#include "debug/output.h" + +namespace ACPI { + +static RSDP* rsdp = 0; +static RSDT* rsdt = 0; +static XSDT* xsdt = 0; + +const char * RSDP_SIGNATURE = "RSD PTR "; + +static int checksum(const void *pos, unsigned len) { + const uint8_t *mem = reinterpret_cast(pos); + uint8_t sum = 0; + for (unsigned i = 0; i < len; i++) { + sum += mem[i]; + } + + return sum; +} + +static const RSDP* findRSDP(const void *pos, unsigned len) { + /* since the RSDP is 16-Byte aligned, we only need to check + every second 64bit memory block */ + for (unsigned block = 0; block < len / 8; block += 2) { + const uint64_t *mem = reinterpret_cast(pos) + block; + if (*mem == *reinterpret_cast(RSDP_SIGNATURE)) { + const RSDP *rsdp = reinterpret_cast(mem); + /* ACPI Specification Revision 4.0a: 5.2.5.3*/ + if ((rsdp->revision == 0 && checksum(mem, 20) == 0) || + (rsdp->length > 20 && checksum(mem, rsdp->length) == 0)) { + return rsdp; + } + } + } + return 0; +} + +bool init() { + /* ACPI Specification Revision 4.0a: + * 5.2.5.1 Finding the RSDP on IA-PC Systems + * OSPM finds the Root System Description Pointer (RSDP) structure by + * searching physical memory ranges on 16-byte boundaries for a valid + * Root System Description Pointer structure signature and checksum + * match as follows: + * * The first 1 KB of the Extended BIOS Data Area (EBDA). For EISA or + * MCA systems, the EBDA can be found in the two-byte location 40:0Eh + * on the BIOS data area. + * * The BIOS read-only memory space between 0E0000h and 0FFFFFh. + */ + volatile uintptr_t ebda_base_address = 0x40e; + const uintptr_t ebda = static_cast(*reinterpret_cast(ebda_base_address)); + const RSDP *rsdp = findRSDP(reinterpret_cast(ebda), 1024); + if (rsdp == nullptr) { + rsdp = findRSDP(reinterpret_cast(0xe0000), 0xfffff-0xe0000); + } + + if (rsdp == nullptr) { + DBG_VERBOSE << "No ACPI!" << endl; + return false; + } + rsdt = reinterpret_cast(static_cast(rsdp->rsdtaddress)); + + /* If the XSDT is present we must use it; see: + * ACPI Specification Revision 4.0a: + * "An ACPI-compatible OS must use the XSDT if present." + */ + if (rsdp->revision != 0 && rsdp->length >= 36) { + xsdt = reinterpret_cast(rsdp->xsdtaddress); + } + DBG_VERBOSE << "ACPI revision " << rsdp->revision << endl; + for (unsigned i = 0; i != count(); ++i) { + SDTH *sdt = get(i); + if (sdt != nullptr) { + char *c = reinterpret_cast(&sdt->signature); + DBG_VERBOSE << i << ". " << c[0] << c[1] << c[2] << c[3] << " @ " << reinterpret_cast(sdt) << endl; + } + } + return true; +} + +unsigned count() { + if (xsdt != nullptr) { + return (xsdt->length-36)/8; + } else if (rsdt != nullptr) { + return (rsdt->length-36)/4; + } else { + return 0; + } +} + +SDTH *get(unsigned num) { + if (xsdt != nullptr) { + SDTH *entry = reinterpret_cast(xsdt->entries[num]); + if (checksum(entry, entry->length) == 0) { + return entry; + } + } else if (rsdt != nullptr) { + SDTH *entry = reinterpret_cast(static_cast(rsdt->entries[num])); + if (checksum(entry, entry->length) == 0) { + return entry; + } + } + return 0; +} + +SDTH *get(char a, char b, char c, char d) { + union { + char signature[4]; + uint32_t value; + }; + signature[0] = a; + signature[1] = b; + signature[2] = c; + signature[3] = d; + + if (xsdt != nullptr) { + for (unsigned i = 0; i < count(); i++) { + SDTH *entry = reinterpret_cast(xsdt->entries[i]); + if (entry->signature == value && checksum(entry, entry->length) == 0) { + return entry; + } + } + } else if (rsdt != nullptr) { + for (unsigned i = 0; i < count(); i++) { + SDTH *entry = reinterpret_cast(static_cast(rsdt->entries[i])); + if (entry->signature == value && checksum(entry, entry->length) == 0) { + return entry; + } + } + } + return 0; +} + +int revision() { + return rsdp != nullptr ? rsdp->revision : -1; +} + +} // namespace ACPI diff --git a/machine/acpi.h b/machine/acpi.h new file mode 100644 index 0000000..a8259d0 --- /dev/null +++ b/machine/acpi.h @@ -0,0 +1,257 @@ +/*! \file + * \brief Structs and methods related to the \ref ACPI "Advanced Configuration and Power Interface (ACPI)" + */ + +#pragma once +#include "types.h" + +/*! \brief Abstracts the ACPI standard that provides interfaces for hardware detection, device configuration, + * and energy management. + * \ingroup io + * + * ACPI is the successor to APM (Advanced Power Management), aiming to give the operating system more control + * over the hardware. This extended control, for instance, enables the operating system to assign a particular amount + * of energy to every device (e.g., by disabling a device or changing to standby mode). + * For this purpose, BIOS and chipset provide a set of tables that describe the system and its components and provide + * routines the OS can call. + * These tables contain details about the system, such as the number of CPU cores and the LAPIC/IOAPIC, which are + * determined during system boot. + */ + +namespace ACPI { + +/*! \brief Root System Description Pointer (RSDP) + * + * The first step to using ACPI is finding the RSDP that is used to find the RSDT / XSDT, which themselves + * contain pointers to even more tables. + * + * On UEFI systems, the RSDP can be found in the EFI_SYSTEM_TABLE; for non-UEFI systems we have to search for the + * signature 'RSD PTR ' in the EBDA (Extended Bios Data Area) or in the memory area up to `FFFFFh`. + * + * \see [ACPI-Specification 5.2.5.3; Root System Description Pointer (RSDP) Structure](acpi.pdf#page=161) + */ + +struct RSDP { + char signature[8]; /* must exactly be equal to 'RSD PTR ' */ + uint8_t checksum; + char oemid[6]; + uint8_t revision; /* specifies the ACPI version */ + uint32_t rsdtaddress; /* physical address of the RSDT */ + uint32_t length; + uint64_t xsdtaddress; /* physical address of the XSDT */ + uint8_t extended_checksum; + uint8_t reserved[3]; +} __attribute__((packed)); + +/*! \brief System Description Table Header (SDTH) + * + * All System Description Tables (e.g., the RSDT) contain the same entries at the very beginning of + * the structure, which are abstracted in the SDTH. + * + * \see [ACPI-Specification 5.2.6; System Description Table Header](acpi.pdf#page=162) + */ +struct SDTH { + uint32_t signature; /* table id */ + uint32_t length; + uint8_t revision; + uint8_t checksum; + char oemid[6]; + char oem_table_id[8]; + uint32_t oem_revision; + uint32_t creator_id; + uint32_t creator_revision; + + /* \brief Helper method + * \return Pointer to the end of the table + */ + void *end() { + return reinterpret_cast(this)+length; + } +} __attribute__((packed)); + +/*! \brief Root System Description Table (RSDT) + * + * The RSDT can be found in the RSDP. The RSDT contains physical addresses of all other System Description Tables, + * for example the MADT. + * + * \see [ACPI-Specification 5.2.7; Root System Description Table (RSDT)](acpi.pdf#page=167) + */ + +struct RSDT : SDTH { + uint32_t entries[]; +} __attribute__((packed)); + +/*! \brief Extended System Description Table (XSDT) + * + * Like RSDT, but contains 64-bit instead of 32-bit addresses. + * + * \see [ACPI-Specification 5.2.8; Extended System Description Table (XSDT)](acpi.pdf#page=168) + */ + +struct XSDT : SDTH { + uint64_t entries[]; +} __attribute__((packed)); + +/*! \brief Helper structure + * + * Is used for accessing the substructures present in SRAT / MADT. + * + */ +struct SubHeader { + uint8_t type; + uint8_t length; + + /* Method to traverse multiple substructures */ + SubHeader *next() { + return reinterpret_cast(reinterpret_cast(this)+length); + } +} __attribute__((packed)); + +/*! \brief Multiple APIC Description Table (MADT) + * + * Describes all interrupt controllers present within the system. Is used to obtain the IDs of the APICs, along with + * the number of available processor cores. + * + * \see [ACPI-Specification 5.2.12; Multiple APIC Description Table (MADT)](acpi.pdf#page=193) + */ +struct MADT : SDTH { + uint32_t local_apic_address; + uint32_t flags_pcat_compat:1, + flags_reserved:31; + + /* method to access the first subheader */ + SubHeader *first() { + return reinterpret_cast(reinterpret_cast(this)+sizeof(MADT)); + } +} __attribute__((packed)); + +enum class AddressSpace : uint8_t { + MEMORY = 0x0, + IO = 0x1, +}; + +/*! \brief ACPI address format + * + * The ACPI standard defines its own address format that is able to handle addresses both in memory address space, + * as well as IO-port address space. + */ +struct Address { + AddressSpace address_space; + uint8_t register_bit_width; + uint8_t register_bit_offset; + uint8_t reserved; + uint64_t address; +} __attribute__((packed)); + +// Multiple APIC Definition Structure +namespace MADS { +enum Type { + Type_LAPIC = 0, + Type_IOAPIC = 1, + Type_Interrupt_Source_Override = 2, + Type_LAPIC_Address_Override = 5, +}; + +/*! \brief Processor Local APIC (LAPIC) Structure + * + * Represents a physical processor along with its local interrupt controller. + * The MADT contains a LAPIC structure for every processor available in the system. + * + * \see [ACPI-Specification 5.2.12.2; Processor Local APIC Structure](acpi.pdf#page=195) + */ +struct LAPIC : SubHeader { + uint8_t acpi_processor_id; + uint8_t apic_id; + uint32_t flags_enabled : 1, + flags_reserved : 31; /* must be 0 */ +} __attribute__((packed)); + +/*! \brief I/O APIC Structure + * + * Represents an I/O-APIC. + * The MADT contains an IOAPIC structure for every I/O APIC present in the system. + * + * \see [ACPI-Specification 5.2.12.3; I/O APIC Structure](acpi.pdf#page=196) + */ + +struct IOAPIC : SubHeader { + uint8_t ioapic_id; + uint8_t reserved; + uint32_t ioapic_address; + uint32_t global_system_interrupt_base; +} __attribute__((packed)); + +/*! \brief Interrupt Source Override Structure + * + * Is required to describe differences between the IA-PC standard interrupt definition and the actual + * hardware implementation. + * + * \see [ACPI-Specification 5.2.12.5; Interrupt Source Override Structure](acpi.pdf#page=197) + */ +struct Interrupt_Source_Override : SubHeader { + uint8_t bus; + uint8_t source; + uint32_t global_system_interrupt; + uint16_t flags_polarity : 2, + flags_trigger_mode : 2, + flags_reserved : 12; /* must be 0 */ +} __attribute__((packed)); + +/*! \brief Local APIC Address Override Structure + * + * Support for 64-bit systems is achieved by replacing the 32-bit physical LAPIC address stored in the MADT + * with the corresponding 64-bit address. + * + * \see [ACPI-Specification 5.2.12.8; Local APIC Address Override Structure](acpi.pdf#page=199) + */ + +struct LAPIC_Address_Override : SubHeader { + uint16_t reserved; + union { + uint64_t lapic_address; + struct { + uint32_t lapic_address_low; + uint32_t lapic_address_high; + } __attribute__((packed)); + }; +} __attribute__((packed)); + +} // namespace MADS + +/*! \brief Initialize the ACPI description table + * + * Searches physical memory ranges o 16-byte boundaries for a valid Root System Description Pointer (RSDP) + * structure signature and checksum. + * If present, the superseding Extended System Description Table (XSDT) is used. + * + * \see [ACPI-Specification 5.2.5 Root System Description Pointer (RSDP)](acpi.pdf#page=160) + * \see [ACPI-Specification 5.2.8 Extended System Description Table (XSDT)](acpi.pdf#page=168) + */ +bool init(); + +/*! \brief Number of entries in the description table + */ +unsigned count(); + +/*! \brief Get entry of description table by index + * + * \param num index in description table + * \return Pointer to corresponding entry or `nullptr` if not available + */ +SDTH *get(unsigned num); + +/*! \brief Get entry of description table by four character identifier + * + * \param a first character of identifier + * \param b second character of identifier + * \param c third character of identifier + * \param d forth and last character of identifier + * \return Pointer to corresponding entry or `nullptr` if not available + */ +SDTH *get(char a, char b, char c, char d); + +/*! \brief Retrieve the revision from the Root System Description Pointer (RSDP) + */ +int revision(); + +} // namespace ACPI diff --git a/machine/apic.cc b/machine/apic.cc new file mode 100644 index 0000000..c034426 --- /dev/null +++ b/machine/apic.cc @@ -0,0 +1,143 @@ +#include "machine/apic.h" +#include "machine/acpi.h" +#include "machine/core.h" +#include "machine/cmos.h" +#include "machine/ioport.h" +#include "machine/lapic.h" +#include "machine/lapic_registers.h" +#include "utils/string.h" +#include "debug/assert.h" +#include "debug/output.h" + +namespace APIC { + +static struct { + uint32_t id; + uintptr_t address; + uint32_t interrupt_base; +} ioapic; + +static uint8_t slot_map[16]; + +static uint8_t lapic_id[Core::MAX]; +static unsigned lapics = 0; + +bool init() { + // get Multiple APIC Definition Table (MADT) from ACPI + ACPI::MADT *madt = static_cast(ACPI::get('A', 'P', 'I', 'C')); + if(madt == 0) { + DBG_VERBOSE << "ERROR: no MADT found in ACPI" << endl; + return false; + } + + // read the local APIC address + LAPIC::base_address = static_cast(madt->local_apic_address); + DBG_VERBOSE << "LAPIC Address " + << reinterpret_cast(static_cast(madt->local_apic_address)) << endl; + + // PC/AT compatibility mode + if (madt->flags_pcat_compat != 0) { + // The APIC operating mode is set to compatible PIC mode - we have to change it. + DBG_VERBOSE << "PIC comp mode, disabling PICs." << endl; + + // Select Interrupt Mode Control Register (IMCR) + // (this register will only exist if hardware supports the PIC mode) + IOPort reg(0x22); + reg.outb(0x70); + // disable PIC mode, use APIC + IOPort imcr(0x23); + imcr.outb(0x01); + } + + // Set default mapping of external interrupt slots (might be overwritten below) + for (unsigned i = 0; i < sizeof(slot_map)/sizeof(slot_map[0]); i++) { + slot_map[i] = i; + } + + // Initialize invalid lapic_ids + for (unsigned i = 0; i < Core::MAX; i++) { + lapic_id[i] = INVALID_ID; + } + + // reset numbers, store apic data into arrays + for (ACPI::SubHeader *mads = madt->first(); mads < madt->end(); mads = mads->next()) { + switch(mads->type) { + case ACPI::MADS::Type_LAPIC: + { + ACPI::MADS::LAPIC* mads_lapic = static_cast(mads); + if (mads_lapic->flags_enabled == 0) { + DBG_VERBOSE << "Detected disabled LAPIC with ID " << static_cast(mads_lapic->apic_id) << endl; + } else if (lapics >= Core::MAX) { + DBG_VERBOSE << "Got more LAPICs than Core::MAX" << endl; + } else if (mads_lapic->apic_id == INVALID_ID) { + DBG_VERBOSE << "Got invalid APIC ID" << endl; + } else { + DBG_VERBOSE << "Detected LAPIC with ID " << static_cast(mads_lapic->apic_id) << endl; + lapic_id[lapics++] = mads_lapic->apic_id; + } + break; + } + case ACPI::MADS::Type_IOAPIC: + { + ACPI::MADS::IOAPIC* mads_ioapic = static_cast(mads); + DBG_VERBOSE << "Detected IO APIC with ID " << static_cast(mads_ioapic->ioapic_id) << " / Base " + << reinterpret_cast(static_cast(mads_ioapic->global_system_interrupt_base)) + << endl; + if (mads_ioapic->global_system_interrupt_base > 23) { + DBG_VERBOSE << "Ignoring IOAPIC since we currently only support one." << endl; + } else { + ioapic.id = mads_ioapic->ioapic_id; + ioapic.address = static_cast(mads_ioapic->ioapic_address); + ioapic.interrupt_base = mads_ioapic->global_system_interrupt_base; + } + break; + } + case ACPI::MADS::Type_Interrupt_Source_Override: + { + ACPI::MADS::Interrupt_Source_Override* mads_iso = static_cast(mads); + if (mads_iso->bus == 0) { + DBG_VERBOSE << "Overriding Interrupt Source " << static_cast(mads_iso->source) + << " with " << mads_iso->global_system_interrupt << endl; + if (mads_iso->source < sizeof(slot_map)/sizeof(slot_map[0])) { + slot_map[mads_iso->source] = mads_iso->global_system_interrupt; + } + } else { + DBG_VERBOSE << "Override for bus " << mads_iso->bus << " != ISA. Does not conform to ACPI." << endl; + } + break; + } + case ACPI::MADS::Type_LAPIC_Address_Override: + { + ACPI::MADS::LAPIC_Address_Override* mads_lao = static_cast(mads); + LAPIC::base_address = static_cast(mads_lao->lapic_address_low); + DBG_VERBOSE << "Overriding LAPIC address with " + << reinterpret_cast(static_cast(mads_lao->lapic_address)) << endl; + break; + } + } + } + return true; +} + +uint8_t getIOAPICSlot(APIC::Device device) { + return slot_map[device]; +} + +uintptr_t getIOAPICAddress() { + return ioapic.address; +} + +uint8_t getIOAPICID() { + return ioapic.id; +} + +uint8_t getLogicalAPICID(uint8_t core) { + return core < Core::MAX ? (1 << core) : 0; +} + +uint8_t getLAPICID(uint8_t core) { + assert(core < Core::MAX); + return lapic_id[core]; +} + +} // namespace APIC diff --git a/machine/apic.h b/machine/apic.h new file mode 100644 index 0000000..c21278a --- /dev/null +++ b/machine/apic.h @@ -0,0 +1,80 @@ +/*! \file + * \brief Gather system information from the \ref ACPI about the \ref APIC "Advanced Programmable Interrupt Controller (APIC)" + */ + +#pragma once + +#include "types.h" + +/*! \brief Information about the (extended) Advanced Programmable Interrupt Controller + */ +namespace APIC { + /*! \brief Historic order of interrupt lines (PIC) + */ + enum Device { + TIMER = 0, ///< Programmable Interrupt Timer (\ref PIT) + KEYBOARD = 1, ///< Keyboard + COM1 = 4, ///< First serial interface + COM2 = 3, ///< Second serial interface + COM3 = 4, ///< Third serial interface (shared with COM1) + COM4 = 3, ///< Forth serial interface (shared with COM2) + FLOPPY = 6, ///< Floppy device + LPT1 = 7, ///< Printer + REALTIMECLOCK = 8, ///< Real time clock + PS2MOUSE = 12, ///< Mouse + IDE1 = 14, ///< First hard disk + IDE2 = 15 ///< Second hard disk + }; + + /*! \brief Invalid APIC ID + * + * The highest address is reserved according to xAPIC specification + */ + const uint8_t INVALID_ID = 0xff; + + /*! \brief Executes system detection + * + * Searches and evaluates the APIC entries in the \ref ACPI table. + * This function recognizes a possibly existing multicore system. + * After successful detection, the number of available CPUs (which is equal + * to the number of \ref LAPIC "local APICs") ) can be queried + * using the method \ref Core::count(). + * + * \note Called by \ref kernel_init() on BSP + * + * \return `true` if detection of the APIC entries was successful + */ + bool init(); + + /*! \brief Queries the physical I/O-APIC address determined during system boot + * + * \return Base address of the (first & only supported) I/O APIC + */ + uintptr_t getIOAPICAddress(); + + /*! \brief Queries of ID of the I/O-APIC determined during system boot + * + * \return Identification of the (first & only supported) I/O APIC + */ + uint8_t getIOAPICID(); + + /*! \brief Returns the pin number the \p device is connected to. + */ + uint8_t getIOAPICSlot(APIC::Device device); + + /*! \brief Returns the logical ID of the Local APIC passed for \a core. + * + * The LAPIC's logical ID is set (by StuBS) during boot such that exactly one bit is set per CPU core. + * For core 0, bit 0 is set in its ID, while core 1 has bit 1 set, etc. + * + * \param core The queried CPU core + */ + uint8_t getLogicalAPICID(uint8_t core); + + /*! \brief Get the Local APIC ID of a CPU + * \param core Query CPU core number + * \return LAPIC ID of CPU or INVALID_ID if invalid CPU ID + */ + uint8_t getLAPICID(uint8_t core); + +} // namespace APIC diff --git a/machine/cmos.cc b/machine/cmos.cc new file mode 100644 index 0000000..3045b90 --- /dev/null +++ b/machine/cmos.cc @@ -0,0 +1,61 @@ +#include "machine/cmos.h" + +#include "machine/core.h" +#include "machine/ioport.h" + +namespace CMOS { +static IOPort address(0x70); +static IOPort data(0x71); + +namespace NMI { +static const uint8_t mask = 0x80; +// Cache NMI to speed things up +static bool disabled = false; + +void enable() { + bool status = Core::Interrupt::disable(); + uint8_t value = address.inb(); + value &= ~mask; + address.outb(value); + Core::Interrupt::restore(status); + disabled = false; +} + +void disable() { + bool status = Core::Interrupt::disable(); + uint8_t value = address.inb(); + value |= mask; + address.outb(value); + Core::Interrupt::restore(status); + disabled = true; +} + +bool isEnabled() { + disabled = (address.inb() & mask) != 0; + return !disabled; +} +} // namespace NMI + +static void setAddress(enum Register reg) { + uint8_t value = reg; + // The highest bit controls the Non Maskable Interrupt + // so we don't want to accidentally change it. + if (NMI::disabled) { + value |= NMI::mask; + } else { + value &= ~NMI::mask; + } + address.outb(value); +} + +uint8_t read(enum Register reg) { + setAddress(reg); + return data.inb(); +} + +void write(enum Register reg, uint8_t value) { + setAddress(reg); + data.outb(value); +} + +} // namespace CMOS diff --git a/machine/cmos.h b/machine/cmos.h new file mode 100644 index 0000000..3ec8433 --- /dev/null +++ b/machine/cmos.h @@ -0,0 +1,41 @@ + /*! \file + * \brief Controlling the \ref CMOS "complementary metal oxide semiconductor (CMOS)" + */ + +#pragma once + +#include "types.h" + +/*! \brief CMOS + * \ingroup CMOS + */ +namespace CMOS { + +enum Register { + REG_SECOND = 0x0, ///< RTC + REG_ALARM_SECOND = 0x1, ///< RTC + REG_MINUTE = 0x2, ///< RTC + REG_ALARM_MINUTE = 0x3, ///< RTC + REG_HOUR = 0x4, ///< RTC + REG_ALARM_HOUR = 0x5, ///< RTC + REG_WEEKDAY = 0x6, ///< RTC + REG_DAYOFMONTH = 0x7, ///< RTC + REG_MONTH = 0x8, ///< RTC + REG_YEAR = 0x9, ///< RTC + REG_STATUS_A = 0xa, ///< RTC + REG_STATUS_B = 0xb, ///< RTC + REG_STATUS_C = 0xc, ///< RTC + REG_STATUS_D = 0xd, ///< RTC + REG_STATUS_DIAGNOSE = 0xe, + REG_STATUS_SHUTDOWN = 0xf +}; + +uint8_t read(enum Register reg); +void write(enum Register reg, uint8_t value); + +namespace NMI { +void enable(); +void disable(); +bool isEnabled(); +} // namespace NMI +} // namespace CMOS diff --git a/machine/core.cc b/machine/core.cc new file mode 100644 index 0000000..2bec0f1 --- /dev/null +++ b/machine/core.cc @@ -0,0 +1,75 @@ +#include "machine/core.h" +#include "machine/apic.h" +#include "machine/lapic.h" + +/*! \brief Initial size of CPU core stacks + * + * Used during startup in `boot/startup.asm` + */ +extern "C" const unsigned long CPU_CORE_STACK_SIZE = 4096; + +/*! \brief Reserved memory for CPU core stacks + */ +alignas(16) static unsigned char cpu_core_stack[Core::MAX * CPU_CORE_STACK_SIZE]; + +/*! \brief Pointer to stack memory + * + * Incremented during startup of each core (bootstrap and application processors) in `boot/startup.asm` + */ +unsigned char * cpu_core_stack_pointer = cpu_core_stack; + +namespace Core { + +static unsigned cores = 0; ///< Number of available CPU cores +static unsigned core_id[255]; ///< Lookup table for CPU core IDs with LAPIC ID as index + +static unsigned online_cores = 0; ///< Number of currently online CPU cores +static bool online_core[Core::MAX]; ///< Lookup table for online CPU cores with CPU core ID as index + +void init() { + // Increment number of online CPU cores + if (__atomic_fetch_add(&online_cores, 1, __ATOMIC_RELAXED) == 0) { + // Fill Lookup table + for (unsigned i = 0; i < Core::MAX; i++) { + uint8_t lapic_id = APIC::getLAPICID(i); + if (lapic_id < APIC::INVALID_ID) { // ignore invalid LAPICs + core_id[lapic_id] = i; + cores++; + } + } + } + + // Get CPU ID + uint8_t cpu = getID(); + + // initialize local APIC with logical APIC ID + LAPIC::init(APIC::getLogicalAPICID(cpu)); + + // set current CPU online + online_core[cpu] = true; + +} + +void exit() { + // CPU core offline + online_core[getID()] = false; + __atomic_fetch_sub(&online_cores, 1, __ATOMIC_RELAXED); +} + +unsigned getID() { + return core_id[LAPIC::getID()]; +} + +unsigned count() { + return cores; +} + +unsigned countOnline() { + return online_cores; +} + +bool isOnline(uint8_t core_id) { + return core_id > Core::MAX ? false : online_core[core_id]; +} + +} // namespace Core diff --git a/machine/core.h b/machine/core.h new file mode 100644 index 0000000..3790b94 --- /dev/null +++ b/machine/core.h @@ -0,0 +1,112 @@ +/*! \file + * \brief Access to internals of a CPU \ref Core + */ + +/*! \defgroup sync CPU Synchronization + * + * The synchronization module houses functions useful for orchestrating multiple processors and their activities. + * Synchronisation, in this case, means handling the resource contention between multiple participants, running on + * either the same or different cores. + */ + +#pragma once + +#include "types.h" +#include "machine/core_cr.h" +#include "machine/core_interrupt.h" +#include "machine/core_msr.h" + +/*! \brief Implements an abstraction for CPU internals. + * + * These internals include functions to \ref Core::Interrupt "allow or deny interrupts", + * access \ref Core::CR "control registers". + */ +namespace Core { + +/*! \brief Maximum number of supported CPUs + */ +const unsigned MAX = 8; + +/*! \brief Get the ID of the current CPU core + * using \ref LAPIC::getID() with an internal lookup table. + * + * \return ID of current Core (a number between 0 and \ref Core::MAX) + */ +unsigned getID(); + +/*! \brief Initialize this CPU core + * + * Mark this core as *online* and setup the cores \ref LAPIC by assigning it a + * unique \ref APIC::getLogicalAPICID() "logical APIC ID" + * + * \note Should only be called from \ref kernel_init() during startup. + */ +void init(); + +/*! \brief Deinitialize this CPU core + * + * Mark this Core as *offline* + * + * \note Should only be called from \ref kernel_init() after returning from `main()` or `main_ap()`. + */ +void exit(); + +/*! \brief Get number of available CPU cores + * + * \return total number of cores + */ +unsigned count(); + +/*! \brief Get number of successfully started (and currently active) CPU cores + * + * \return total number of online cores + */ +unsigned countOnline(); + +/*! \brief Check if CPU core is currently active + * \param core_id ID of the CPU core + * \return `true` if successfully started and is currently active + */ +bool isOnline(uint8_t core_id); + +/*! \brief Gives the core a hint that it is executing a spinloop and should sleep "shortly" + * + * Improves the over-all performance when executing a spinloop by waiting a short moment reduce + * the load on the memory. + * + * \see [ISDMv2, Chapter 4. PAUSE - Spin Loop Hint](intel_manual_vol2.pdf#page=887) + */ +inline void pause() { + asm volatile("pause\n\t" : : : "memory"); +} + +/*! \brief Halt the CPU core until the next interrupt. + * + * Halts the current CPU core such that it will wake up on the next interrupt. Internally, this function first enables + * the interrupts via `sti` and then halts the core using `hlt`. Halted cores can only be woken by interrupts. + * The effect of `sti` is delayed by one instruction, making the sequence `sti hlt` atomic (if interrupts were + * disabled previously). + * + * \see [ISDMv2, Chapter 4. STI - Set Interrupt Flag](intel_manual_vol2.pdf#page=1297) + * \see [ISDMv2, Chapter 3. HLT - Halt](intel_manual_vol2.pdf#page=539) + */ +inline void idle() { + asm volatile("sti\n\t hlt\n\t" : : : "memory"); +} + +/*! \brief Permanently halts the core. + * + * Permanently halts the current CPU core. Internally, this function first disables the interrupts via `cli` and + * then halts the CPU core using `hlt`. As halted CPU cores can only be woken by interrupts, it is guaranteed that + * this core will be halted until the next reboot. The execution of die never returns. + * On multicore systems, only the executing CPU core will be halted permanently, other cores will continue execution. + * + * \see [ISDMv2, Chapter 3. CLI - Clear Interrupt Flag](intel_manual_vol2.pdf#page=245) + * \see [ISDMv2, Chapter 3. HLT - Halt](intel_manual_vol2.pdf#page=539) + */ +[[noreturn]] inline void die() { + while (true) { + asm volatile("cli\n\t hlt\n\t" : : : "memory"); + } +} +} // namespace Core diff --git a/machine/core_cr.h b/machine/core_cr.h new file mode 100644 index 0000000..bdea098 --- /dev/null +++ b/machine/core_cr.h @@ -0,0 +1,81 @@ +/*! \file + * \brief Access to \ref Core::CR "Control Register" of a \ref Core "CPU core" + */ + +#pragma once + +#include "types.h" + +namespace Core { +/*! \brief Control Register 0 + * + * \see [ISDMv3, 2.5 Control Registers](intel_manual_vol3.pdf#page=74) + */ +enum CR0 : uintptr_t { + CR0_PE = 1U << 0, ///< Protected Mode enabled + CR0_MP = 1U << 1, ///< Monitor co-processor + CR0_EM = 1U << 2, ///< Emulation (no x87 floating-point unit present) + CR0_TS = 1U << 3, ///< Task switched + CR0_ET = 1U << 4, ///< Extension type + CR0_NE = 1U << 5, ///< Numeric error + CR0_WP = 1U << 16, ///< Write protect + CR0_AM = 1U << 18, ///< Alignment mask + CR0_NW = 1U << 29, ///< Not-write through caching + CR0_CD = 1U << 30, ///< Cache disable + CR0_PG = 1U << 31, ///< Paging +}; + +/*! \brief Control Register 4 + * + * \see [ISDMv3, 2.5 Control Registers](intel_manual_vol3.pdf#page=77) + */ +enum CR4 : uintptr_t { + CR4_VME = 1U << 0, ///< Virtual 8086 Mode Extensions + CR4_PVI = 1U << 1, ///< Protected-mode Virtual Interrupts + CR4_TSD = 1U << 2, ///< Time Stamp Disable + CR4_DE = 1U << 3, ///< Debugging Extensions + CR4_PSE = 1U << 4, ///< Page Size Extension + CR4_PAE = 1U << 5, ///< Physical Address Extension + CR4_MCE = 1U << 6, ///< Machine Check Exception + CR4_PGE = 1U << 7, ///< Page Global Enabled + CR4_PCE = 1U << 8, ///< Performance-Monitoring Counter enable + CR4_OSFXSR = 1U << 9, ///< Operating system support for FXSAVE and FXRSTOR instructions + CR4_OSXMMEXCPT = 1U << 10, ///< Operating System Support for Unmasked SIMD Floating-Point Exceptions + CR4_UMIP = 1U << 11, ///< User-Mode Instruction Prevention + CR4_VMXE = 1U << 13, ///< Virtual Machine Extensions Enable + CR4_SMXE = 1U << 14, ///< Safer Mode Extensions Enable + CR4_FSGSBASE = 1U << 16, ///< Enables the instructions RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE. + CR4_PCIDE = 1U << 17, ///< PCID Enable + CR4_OSXSAVE = 1U << 18, ///< XSAVE and Processor Extended States Enable + CR4_SMEP = 1U << 20, ///< Supervisor Mode Execution Protection Enable + CR4_SMAP = 1U << 21, ///< Supervisor Mode Access Prevention Enable + CR4_PKE = 1U << 22, ///< Protection Key Enable +}; + +/*! \brief Access to the Control Register + * + * \see [ISDMv3, 2.5 Control Registers](intel_manual_vol3.pdf#page=73) + * \tparam id Control Register to access + */ +template +class CR { + public: + /*! \brief Read the value of the current Control Register + * + * \return Value stored in the CR + */ + inline static uintptr_t read(void) { + uintptr_t val; + asm volatile("mov %%cr%c1, %0" : "=r"(val) : "n"(id) : "memory"); + return val; + } + + /*! \brief Write a value into the current Control Register + * + * \param value Value to write into the CR + */ + inline static void write(uintptr_t value) { + asm volatile("mov %0, %%cr%c1" : : "r"(value), "n"(id) : "memory"); + } +}; +} // namespace Core diff --git a/machine/core_interrupt.h b/machine/core_interrupt.h new file mode 100644 index 0000000..dbe9c2b --- /dev/null +++ b/machine/core_interrupt.h @@ -0,0 +1,130 @@ +/*! \file + * \brief \ref Core::Interrupt "Interrupt control" and \ref Core::Interrupt::Vector "interrupt vector list" + */ + +#pragma once + +#include "types.h" + +namespace Core { +/*! \brief Exception and Interrupt control + * + * \see [ISDMv3, Chapter 6 Interrupt and Exception Handling](intel_manual_vol3.pdf#page=185) + */ +namespace Interrupt { + +/*! \brief Bit in `FLAGS` register corresponding to the current interrupt state + */ +const uintptr_t FLAG_ENABLE = 1 << 9; + +/*! \brief List of used interrupt vectors. + * + * The exception vectors from `0` to `31` are reserved for traps, faults and aborts. + * Their behavior is different for each exception, some push an *error code*, + * some are not recoverable. + * + * The vectors from `32` to `255` are user defined interrupts. + * + * \see [ISDMv3, 6.15 Exception and Interrupt Reference](intel_manual_vol3.pdf#page=203) + */ +enum Vector { + // Predefined Exceptions + DIVISON_BY_ZERO = 0, ///< Divide-by-zero Error (at a `DIV`/`IDIV` instruction) + DEBUG = 1, ///< Debug exception + NON_MASKABLE_INTERRUPT = 2, ///< Non Maskable Interrupt + BREAKPOINT = 3, ///< Breakpoint exception (used for debugging) + OVERFLOW = 4, ///< Overflow exception (at `INTO` instruction) + BOUND_RANGE_EXCEEDED = 5, ///< Bound Range Exceeded (at `BOUND` instruction) + INVALID_OPCODE = 6, ///< Opcode at Instruction Pointer is invalid (you probably shouldn't be here) + DEVICE_NOT_AVAILABLE = 7, ///< FPU "FPU/MMX/SSE" instruction but corresponding extension not activate + DOUBLE_FAULT = 8, ///< Exception occurred while trying to call exception/interrupt handler + // Coprocessor Segment Overrun (Legacy) + INVALID_TSS = 10, ///< Invalid Task State Segment selector (see error code for index) + SEGMENT_NOT_PRESENT = 11, ///< Segment not available (see error code for selector index) + STACK_SEGMENT_FAULT = 12, ///< Stack segment not available or invalid (see error code for selector index) + GENERAL_PROTECTION_FAULT = 13, ///< Operation not allowed (see error code for selector index) + PAGE_FAULT = 14, ///< Operation on Page (r/w/x) not allowed for current privilege (error code + `cr2`) + // reserved (15) + FLOATING_POINT_EXCEPTION = 16, ///< x87 FPU error (at `WAIT`/`FWAIT`), accidentally \ref Core::CR0_NE set? + ALIGNMENT_CHECK = 17, ///< Unaligned memory access in userspace (Exception activated by \ref Core::CR0_AM) + MACHINE_CHECK = 18, ///< Model specific exception + SIMD_FP_EXCEPTION = 19, ///< SSE/MMX error (if \ref Core::CR4_OSXMMEXCPT activated) + // reserved (20 - 31) + EXCEPTIONS = 32, ///< Number of exceptions + + // Interrupts + TIMER = 32, ///< Periodic CPU local \ref LAPIC::Timer interrupt + KEYBOARD = 33, ///< Keyboard interrupt (key press / release) + GDB = 35, ///< Inter-processor interrupt to stop other CPUs for debugging in \ref GDB + ASSASSIN = 100, ///< Inter-processor interrupt to immediately stop threads running on other CPUs + WAKEUP = 101, ///< Inter-processor interrupt to WakeUp sleeping CPUs + + VECTORS = 256 ///< Number of interrupt vectors +}; + +/*! \brief Check if interrupts are enabled on this CPU + * + * This is done by pushing the `FLAGS` register onto stack, + * reading it into a register and checking the corresponding bit. + * + * \return `true` if enabled, `false` if disabled + */ +inline bool isEnabled() { + uintptr_t out; + asm volatile ( + "pushf\n\t" + "pop %0\n\t" + : "=r"(out) + : + : "memory" + ); + return (out & FLAG_ENABLE) != 0; +} + +/*! \brief Allow interrupts + * + * Enables interrupt handling by executing the instruction `sti`. + * Since this instruction is delayed by one cycle, an subsequent `nop` is executed + * (to ensure deterministic behavior, independent from the compiler generated code) + * + * A pending interrupt (i.e., those arriving while interrupts were disabled) will + * be delivered after re-enabling interrupts. + * + * \see [ISDMv2, Chapter 4. STI - Set Interrupt Flag](intel_manual_vol2.pdf#page=1297) + */ +inline void enable() { + asm volatile("sti\n\t nop\n\t" : : : "memory"); +} + +/*! \brief Forbid interrupts + * + * Prevents interrupt handling by executing the instruction `cli`. + * Will return the previous interrupt state. + * \return `true` if interrupts were enabled at the time of executing this function, + * `false` if they were already disabled. + * + * \see [ISDMv2, Chapter 3. CLI - Clear Interrupt Flag](intel_manual_vol2.pdf#page=245) + */ +inline bool disable() { + bool enabled = isEnabled(); + asm volatile ("cli\n\t" : : : "memory"); + + return enabled; +} + +/*! \brief Restore interrupt + * + * Restore the interrupt state to the state prior to calling \ref disable() by using its return value. + * + * \note This function will never disable interrupts, even if val is false! + * This function is designed to allow nested disabling and restoring of the interrupt state. + * + * \param val if set to `true`, interrupts will be enabled; nothing will happen on false. + */ +inline void restore(bool val) { + if (val) { + enable(); + } +} +} // namespace Interrupt +} // namespace Core diff --git a/machine/core_msr.h b/machine/core_msr.h new file mode 100644 index 0000000..91a6771 --- /dev/null +++ b/machine/core_msr.h @@ -0,0 +1,91 @@ +/*! \file + * \brief \ref Core::MSRs "Identifiers" for \ref Core::MSR "Model-Specific Register" + */ + +#pragma once + +namespace Core { +/*! \brief Model-Specific Register Identifiers + * + * Selection of useful identifiers. + * + * \see [ISDMv4](intel_manual_vol4.pdf) + */ +enum MSRs : uint32_t { + MSR_PLATFORM_INFO = 0xceU, ///< Platform information including bus frequency (Intel) + MSR_TSC_DEADLINE = 0x6e0U, ///< Register for \ref LAPIC::Timer Deadline mode + // Fast system calls + MSR_EFER = 0xC0000080U, ///< Extended Feature Enable Register, \see Core::MSR_EFER + MSR_STAR = 0xC0000081U, ///< eip (protected mode), ring 0 and 3 segment bases + MSR_LSTAR = 0xC0000082U, ///< rip (long mode) + MSR_SFMASK = 0xC0000084U, ///< lower 32 bit: flag mask, if bit is set corresponding rflag is cleared through syscall + + // Core local variables + MSR_FS_BASE = 0xC0000100U, + MSR_GS_BASE = 0xC0000101U, ///< Current GS base pointer + MSR_SHADOW_GS_BASE = 0xC0000102U, ///< Usually called `MSR_KERNEL_GS_BASE` but this is misleading +}; + +/* \brief Important bits in Extended Feature Enable Register (EFER) + * + * \see [ISDMv3, 2.2.1 Extended Feature Enable Register](intel_manual_vol3.pdf#page=69) + * \see [AAPMv2, 3.1.7 Extended Feature Enable Register](amd64_manual_vol2.pdf#page=107) + */ +enum MSR_EFER : uintptr_t { + MSR_EFER_SCE = 1U << 0, ///< System Call Extensions + MSR_EFER_LME = 1U << 8, ///< Long mode enable + MSR_EFER_LMA = 1U << 10, ///< Long mode active + MSR_EFER_NXE = 1U << 11, ///< No-Execute Enable + MSR_EFER_SVME = 1U << 12, ///< Secure Virtual Machine Enable + MSR_EFER_LMSLE = 1U << 13, ///< Long Mode Segment Limit Enable + MSR_EFER_FFXSR = 1U << 14, ///< Fast `FXSAVE`/`FXRSTOR` instruction + MSR_EFER_TCE = 1U << 15, ///< Translation Cache Extension +}; + +/*! \brief Access to the Model-Specific Register (MSR) + * + * \see [ISDMv3, 9.4 Model-Specific Registers (MSRs)](intel_manual_vol3.pdf#page=319) + * \see [ISDMv4](intel_manual_vol4.pdf) + * \tparam id ID of the Model-Specific Register to access + */ +template +class MSR { + /*! \brief Helper to access low and high bits of a 64 bit value + * \internal + */ + union uint64_parts { + struct { + uint32_t low; + uint32_t high; + } __attribute__((packed)); + uint64_t value; + + explicit uint64_parts(uint64_t value = 0) : value(value) {} + }; + + public: + /*! \brief Read the value of the current MSR + * + * \return Value stored in the MSR + * + * \see [ISDMv2, Chapter 4. RDMSR - Read from Model Specific Register](intel_manual_vol2.pdf#page=1186) + */ + static inline uint64_t read() { + uint64_parts p; + asm volatile ("rdmsr \n\t" : "=a"(p.low), "=d"(p.high) : "c"(id)); + return p.value; + } + + /*! \brief Write a value into the current MSR + * + * \param value Value to write into the MSR + * + * \see [ISDMv2, Chapter 5. WRMSR - Write to Model Specific Register](intel_manual_vol2.pdf#page=1912) + */ + static inline void write(uint64_t value) { + uint64_parts p(value); + asm volatile ("wrmsr \n\t" : : "c"(id), "a"(p.low), "d"(p.high)); + } +}; + +} // namespace Core diff --git a/machine/gdt.cc b/machine/gdt.cc new file mode 100644 index 0000000..46a65ef --- /dev/null +++ b/machine/gdt.cc @@ -0,0 +1,53 @@ +#include "machine/gdt.h" +#include "machine/core.h" +#include "debug/assert.h" +#include "debug/output.h" + +namespace GDT { + +// The static 32-bit Global Descriptor Table (GDT) +alignas(16) static SegmentDescriptor protected_mode[] = { + // NULL descriptor + {}, + + // Global code segment von 0-4GB + { /* base = */ 0x0, + /* limit = */ 0xFFFFFFFF, + /* code = */ true, + /* ring = */ 0, + /* size = */ SIZE_32BIT }, + + // Global data segment von 0-4GB + { /* base = */ 0x0, + /* limit = */ 0xFFFFFFFF, + /* code = */ false, + /* ring = */ 0, + /* size = */ SIZE_32BIT }, + +}; +extern "C" constexpr Pointer gdt_protected_mode_pointer(protected_mode); + +// The static 64-bit Global Descriptor Table (GDT) +// \see [ISDMv3 3.2.4 Segmentation in IA-32e Mode](intel_manual_vol3.pdf#page=91) +alignas(16) static SegmentDescriptor long_mode[] = { + // Null segment + {}, + + // Global code segment + { /* base = */ 0x0, + /* limit = */ 0x0, // ignored + /* code = */ true, + /* ring = */ 0, + /* size = */ SIZE_64BIT_CODE }, + + // Global data segment + { /* base = */ 0x0, + /* limit = */ 0x0, // ignored + /* code = */ false, + /* ring = */ 0, + /* size = */ SIZE_64BIT_DATA }, + +}; +extern "C" constexpr Pointer gdt_long_mode_pointer(long_mode); + +} // namespace GDT diff --git a/machine/gdt.h b/machine/gdt.h new file mode 100644 index 0000000..4b117a8 --- /dev/null +++ b/machine/gdt.h @@ -0,0 +1,203 @@ +/*! \file + * \brief The \ref GDT "Global Descriptor Table (GDT)". + */ + +#pragma once + +#include "types.h" + +/*! \brief Abstracts the GDT that, primarily, contains descriptors to memory segments. + * \ingroup memory + * + * The GDT is a table that primarily contains segment descriptors. Segment descriptors has a size of 8 Bytes and + * contains the size, position, access rights, and purpose of such a segment. + * Unlike the LDT, the GDT is shared between all processes and may contain TSS and LDT descriptors. + * For the kernel, the first entry is required to be a null descriptor and the code and data segments. + * To support user-mode processes, additional TSS, code, and data segments for ring 3 must be added. + * + * The base address and size of the GDT are written to the GDTR register during boot (via. `lgdt`). + * + * \see [ISDMv3, 2.4.1; Global Descriptor Table Register (GDTR)](intel_manual_vol3.pdf#page=72) + * \see [ISDMv3, 3.5.1; Segment Descriptor Tables](intel_manual_vol3.pdf#page=99) + */ +namespace GDT { + +enum Segments { + SEGMENT_NULL = 0, + SEGMENT_KERNEL_CODE, + SEGMENT_KERNEL_DATA, +}; + +/*! \brief Unit of the segment limit + */ +enum Granularity { + GRANULARITY_BYTES = 0, ///< Segment limit in Bytes + GRANULARITY_4KBLOCK = 1 ///< Segment limit in blocks of 4 Kilobytes +}; + +/*! \brief Descriptor type */ +enum DescriptorType { + DESCRIPTOR_SYSTEM = 0, ///< entry is a system segment + DESCRIPTOR_CODEDATA = 1, ///< entry is a code/data segment +}; + +/*! \brief Address width + */ +enum Size { + SIZE_16BIT = 0, ///< 16-bit (D/B = 0, L = 0) + SIZE_32BIT = 2, ///< 32-bit (D/B = 1, L = 0) + SIZE_64BIT_CODE = 1, ///< 64-bit (D/B = 0, L = 1) + SIZE_64BIT_DATA = 0, ///< 64-bit (D/B = 0, L = 0) +}; + +/*! \brief Describes the structure of segment descriptors + * + * A data structure that contains size, position, access rights, and purpose of any segment. + * Segment descriptors are used in both the GDT, as well as in LDTs. + * + * \see [ISDMv3, 3.4.5; Segment Descriptors](intel_manual_vol3.pdf#page=95) + * \see [AAPMv2, 4.7 Legacy Segment Descriptors](amd64_manual_vol2.pdf#page=132) + * \see [AAPMv2, 4.8 Long-Mode Segment Descriptors](amd64_manual_vol2.pdf#page=140) + */ +union SegmentDescriptor { + // Universally valid values (shared across all segment types) + struct { + uint64_t limit_low : 16; ///< Least-significant bits of segment size (influenced by granularity!) + uint64_t base_low : 24; ///< Least-significant bits of base address + uint64_t type : 4; ///< Meaning of those 4 bits depends on descriptor_type below + DescriptorType descriptor_type : 1; ///< Descriptor type (influences the meaning of the 3 bits above) + uint64_t privilege_level : 2; ///< Ring for this segment + bool present : 1; ///< Entry is valid iff set to `true` + uint64_t limit_high : 4; ///< Most-significant bits of segment size + bool available : 1; ///< Bit which can be used for other purposes (in software) + uint64_t custom : 2; ///< Meaning of those 2 bits relate to descriptor_type and type + Granularity granularity : 1; ///< Unit used as granularity for the segment limit + uint64_t base_high : 8; ///< most-significant bits of base address + } __attribute__((packed)); + + /*! \brief Fields specific for Code Segment + * (for debugging purposes) + * \see [ISDMv3, 3.4.5.1; Code- and Data-Segment Descriptor Types](intel_manual_vol3.pdf#page=98) + */ + struct { + uint64_t : 40; ///< Ignored (set via `limit_low` and `base_low` ) + + /* `type` field bits */ + bool code_accessed : 1; ///< If set, the code segment was used since the last reset of this value + bool readable : 1; ///< If set, the code is readable (otherwise only executable) + + /*! \brief If set, the execution of code from this segment is only allowed when running at a privilege of + * numerically less than or equal to privilege_level (i.e. the executor has the same or higher + * privileges). However, the executor's privileges remain unchanged. + * For nonconforming code segments (i.e., conforming is set to `0`), execution is allowed only if + * the privileges are equal. + * Execution will cause a GPF in case of privilege violation. + */ + bool conforming : 1; + bool code : 1; ///< Has to be set to `true` + + uint64_t : 9; ///< Ignored (set via `privilege_level` ... `available`) + + Size operation_size : 2; ///< Default address width (`custom` field bit) + + uint64_t : 0; ///< Remainder ignored (set via `base_high`) + } __attribute__((packed)); + + /*! \brief Fields specific for Data Segment + * (for debugging purposes) + * \see [ISDMv3, 3.4.5.1; Code- and Data-Segment Descriptor Types](intel_manual_vol3.pdf#page=98) + */ + struct { + uint64_t : 40; ///< Ignored (set via `limit_low` and `base_low`) + bool data_accessed : 1; ///< If set, the data segment was used since the last reset of this value + bool writeable : 1; ///< If set, data is writable (otherwise read only) + bool expand_down : 1; ///< Growing direction for dynamically growing segments + bool notData : 1; ///< Has to be cleared (`false`) + uint64_t : 9; ///< Ignored (set via `privilege_level` ... `available`) + uint64_t reserved : 1; ///< Reserved, always set to `0`! + + /*! \brief Size of the stack pointer (`false` = 16 bit, `true` = 32 bit) + * \warning Has a different meaning in case expand_down is set to `1`. + */ + bool big : 1; + + uint64_t : 0; ///< Remainder ignored + } __attribute__((packed)); + + uint64_t value; ///!< Merged value; useful for debugging + + /*! \brief Constructor for a specific value */ + constexpr SegmentDescriptor(uint64_t val = 0) : value(val) {} //NOLINT due to copy-initialization + + /*! \brief Constructor for a code/data GDT entry. + * \param base Base Address of segment + * \param limit Size of segment + * \param code Code or data segment + * \param ring Privilege level + * \param size Address width + */ + constexpr SegmentDescriptor(uintptr_t base, uint32_t limit, bool code, int ring, Size size) : + limit_low(limit >> (limit > 0xFFFFF ? 12 : 0) & 0xFFFF), + base_low(base & 0xFFFFFF), + type(code ? 0xA : 0x2), // code readable / non-conforming, data writeable and not expanding down + descriptor_type(DESCRIPTOR_CODEDATA), + privilege_level(ring), + present(true), + limit_high((limit > 0xFFFFF ? (limit >> 28) : (limit >> 16)) & 0xF), + available(false), + custom(size), + granularity(limit > 0xFFFFF ? GRANULARITY_4KBLOCK : GRANULARITY_BYTES), + base_high((base >> 24) & 0xFF) {} + +} __attribute__((packed)); + +static_assert(sizeof(SegmentDescriptor) == 8, "GDT::SegmentDescriptor has wrong size"); + +/*! \brief Structure that describes a GDT Pointer (aka GDT Descriptor) + * + * It contains both the length (in bytes) of the GDT (minus 1 byte) and the pointer to the GDT. + * The pointer to the GDT can be loaded using the instruction `lgdt`. + * + * \note As Intel uses little endian for representing multi-byte values, the GDT::Pointer structure can be used for + * 16, 32, and 64 bit descriptor tables: + * \verbatim + * | 16 bit | 16 bit | 16 bit | 16 bit | 16 bit | + * +--------+---------------------------------------+ + * Pointer | limit | base (up to 64 bit) | + * +--------+---------+---------+---------+---------+ + * | used for 16 bit | ignored... | + * | used for 32 bit | ignored... | + * | used for 64 bit | + * \endverbatim + * + * \see [ISDMv3, Figure 2-6; Memory Management Registers](intel_manual_vol3.pdf#page=72) + */ +struct Pointer { + uint16_t limit; //!< GDT size in bytes (minus 1 byte) + void * base; //!< GDT base address + + /*! \brief Constructor (automatic length) + * \param desc Array of GDT segment descriptors -- must be defined in the same module! + */ + template + explicit constexpr Pointer(const T (&desc)[LEN]) : limit(LEN * sizeof(T) - 1), base(const_cast(desc)) {} + + /*! \brief Constructor + * \param desc Address of the GDT segment descriptors + * \param len Number of entries + */ + constexpr Pointer(void * desc, size_t len) : limit(len * sizeof(SegmentDescriptor) - 1), base(desc) {} + + /*! \brief Set an address + * \note On change, `lgdt` must be executed again + * \param desc Address of the GDT segment descriptors + * \param len Number of entries + */ + void set(void * desc, size_t len) { + limit = len * sizeof(SegmentDescriptor) - 1; + base = desc; + } +} __attribute__((packed)); +static_assert(sizeof(Pointer) == 10, "GDT::Pointer has wrong size"); + +} // namespace GDT diff --git a/machine/idt.cc b/machine/idt.cc new file mode 100644 index 0000000..8415bde --- /dev/null +++ b/machine/idt.cc @@ -0,0 +1,63 @@ +#include "machine/idt.h" +#include "machine/gdt.h" +#include "machine/core_interrupt.h" + +namespace IDT { + +// Interrupt Descriptor stored in the Interrupt-Descriptor Table (IDT) +struct alignas(8) InterruptDescriptor { + uint16_t address_low; ///< lower interrupt function offset + uint16_t selector; ///< code segment selector in GDT or LDT + union { + struct { + uint8_t ist : 3; ///< IST Index (64 bit) + uint8_t : 5; ///< unused, has to be 0 + Gate type : 3; ///< gate type + GateSize size : 1; ///< gate size + uint8_t : 1; ///< unused, has to be 0 + DPL dpl : 2; ///< descriptor privilege level + uint8_t present : 1; ///< present: 1 for interrupts + } __attribute__((packed)); + uint16_t flags; + }; + uint64_t address_high : 48; ///< higher interrupt function offset + uint64_t : 0; ///< fill until aligned with 64 bit +} __attribute__((packed)); + +// Interrupt Descriptor Table, 8 Byte aligned +static struct InterruptDescriptor idt[256]; + +// Struct used for loading (the address of) the Interrupt Descriptor Table into the IDT-Register +struct Register { + uint16_t limit; // Address of the last valid byte (relative to base) + struct InterruptDescriptor * base; + explicit Register(uint8_t max = 255) { + limit = (max + static_cast(1)) * sizeof(InterruptDescriptor) - 1; + base = idt; + } +} __attribute__((packed)); + +static_assert(sizeof(InterruptDescriptor) == 16, "IDT::InterruptDescriptor has wrong size"); +static_assert(sizeof(Register) == 10, "IDT::Register has wrong size"); +static_assert(alignof(decltype(idt)) % 8 == 0, "IDT must be 8 byte aligned!"); + +void load() { + // Create structure required for writing to idtr and load via lidt + Register idtr(Core::Interrupt::VECTORS - 1); + asm volatile("lidt %0\n\t" :: "m"(idtr) ); +} + +void handle(uint8_t vector, void * handler, enum Gate type, enum GateSize size, enum DPL dpl, bool present) { + struct InterruptDescriptor &item = idt[vector]; + item.selector = GDT::SEGMENT_KERNEL_CODE * sizeof(GDT::SegmentDescriptor); + item.type = type; + item.size = size; + item.dpl = dpl; + item.present = present ? 1 : 0; + + uintptr_t address = reinterpret_cast(handler); + item.address_low = address & 0xffff; + item.address_high = (address >> 16) & 0xffffffffffff; +} + +} // namespace IDT diff --git a/machine/idt.h b/machine/idt.h new file mode 100644 index 0000000..132391b --- /dev/null +++ b/machine/idt.h @@ -0,0 +1,60 @@ +/*! \file + * \brief \ref IDT "Interrupt Descriptor Table (IDT)" containing the entry points for interrupt handling. + */ + +#pragma once + +#include "types.h" + +/*! \brief "Interrupt Descriptor Table (IDT) + * \ingroup interrupt + * + * \see [ISDMv3 6.14 Exception and Interrupt Handling in 64-bit Mode](intel_manual_vol3.pdf#page=200) + */ + +namespace IDT { + /*! \brief Gate types + * + * \see [ISDMv3 3.5 System Descriptor Types](intel_manual_vol3.pdf#page=99) + */ + enum Gate { + GATE_TASK = 0x5, ///< Task Gate + GATE_INT = 0x6, ///< Interrupt Gate + GATE_TRAP = 0x7, ///< Trap Gate + }; + + /*! \brief Segment type + * + * \see [ISDMv3 3.5 System Descriptor Types](intel_manual_vol3.pdf#page=99) + */ + enum GateSize { + GATE_SIZE_16 = 0, ///< 16 bit + GATE_SIZE_32 = 1, ///< 32 / 64 bit + }; + + /*! \brief Descriptor Privilege Level + */ + enum DPL { + DPL_KERNEL = 0, ///< Ring 0 / Kernel mode + /* DPLs 1 and 2 are unused */ + DPL_USER = 3, ///< Ring 3 / User mode + }; + + /*! \brief Load the IDT's address and size into the IDT-Register via `idtr`. + */ + void load(); + + /*! \brief Configure entry point for interrupt handling + * + * The provided entry function ("handler") is required to, as first step, save the registers. + * + * \param vector Interrupt vector number for which the handler is to be set/changed + * \param handler Low-level entry point for interrupt handling + * \param type Gate type (Interrupt, Trap, or Task) + * \param size 16- or 32-bit + * \param dpl Permissions required for enter this interrupt handler (kernel- or user space) + * \param present Denotes whether the IDT descriptor is marked as available + */ + void handle(uint8_t vector, void * handler, enum Gate type = Gate::GATE_INT, + enum GateSize size = GateSize::GATE_SIZE_32, enum DPL dpl = DPL::DPL_KERNEL, bool present = true); +} // namespace IDT diff --git a/machine/ioport.h b/machine/ioport.h new file mode 100644 index 0000000..5a957b1 --- /dev/null +++ b/machine/ioport.h @@ -0,0 +1,83 @@ +/*! \file + * \brief \ref IOPort provides access to the x86 IO address space + */ + +#pragma once + +#include "types.h" + +/*! \brief Abstracts access to the I/O address space + * + * x86 PCs have a separated I/O address space that is accessible only via the machine instructions `in` and `out`. + * An IOPort object encapsulates the corresponding address in the I/O address space and can be used for byte or + * word-wise reading or writing. + */ + +class IOPort { + /*! \brief Address in I/O address space + * + */ + uint16_t address; + + public: + /*! \brief Constructor + * \param addr Address from the I/O address space + */ + explicit IOPort(uint16_t addr) : address(addr) {} + + /*! \brief Write one byte to the I/O port + * \param val The value to be written + */ + void outb(uint8_t val) const { + asm volatile( + "out %%al, %%dx\n\t" + : + : "a"(val), "d"(address) + : + ); + } + + /*! \brief Write one word (2 bytes) to the I/O port + * \param val The value to be written + */ + void outw(uint16_t val) const { + asm volatile( + "out %%ax, %%dx\n\t" + : + :"a"(val), "d"(address) + : + ); + } + + /*! \brief Read one byte from the I/O port + * \return Read byte + */ + uint8_t inb() const { + uint8_t out = 0; + + asm volatile( + "in %%dx, %%al\n\t" + :"=a"(out) + :"d"(address) + : + ); + + return out; + } + + /*! \brief Read one word (2 bytes) from the I/O port + * \return Read word (2 bytes) + */ + uint16_t inw() const { + uint16_t out = 0; + + asm volatile( + "inw %%dx, %%ax\n\t" + :"=a"(out) + :"d"(address) + : + ); + + return out; + } +}; diff --git a/machine/keydecoder.cc b/machine/keydecoder.cc new file mode 100644 index 0000000..c093f07 --- /dev/null +++ b/machine/keydecoder.cc @@ -0,0 +1,118 @@ +#include "machine/keydecoder.h" +#include "machine/ps2controller.h" + +// Constants used for key decoding +const unsigned char BREAK_BIT = 0x80; +const unsigned char PREFIX_1 = 0xe0; +const unsigned char PREFIX_2 = 0xe1; + +Key KeyDecoder::decode(unsigned char code) { + Key key = modifier; + + // All keys that are introduced by the MF II keyboard (compared to the older AT keyboard) + // always send a prefix value as first byte. + if (code == PREFIX_1 || code == PREFIX_2) { + prefix = code; + } else { + // Releasing a key is, for us, only important for the modifier keys such as SHIFT, CTRL and ALT, + // For other, non-modifier keys, we ignore the break code. + bool pressed = (code & BREAK_BIT) == 0; + + // A key's break code is identical to its make code with an additionally set BREAK_BIT + Key::Scancode scancode = static_cast(code & (~BREAK_BIT)); + + // We ignore "new" special keys, such as the Windows key + if (scancode < Key::Scancode::KEYS) { + // save state + status[scancode] = pressed; + + // Take a closer look at modifier make and break events + bool isModifier = true; + switch (scancode) { + // both shifts are handled equally + case Key::Scancode::KEY_LEFT_SHIFT: + case Key::Scancode::KEY_RIGHT_SHIFT: + modifier.shift = pressed; + break; + + case Key::Scancode::KEY_LEFT_ALT: + if (prefix == PREFIX_1) { + modifier.alt_right = pressed; + } else { + modifier.alt_left = pressed; + } + break; + + case Key::Scancode::KEY_LEFT_CTRL: + if (prefix == PREFIX_1) { + modifier.ctrl_right = pressed; + } else { + modifier.ctrl_left = pressed; + } + break; + + default: + isModifier = false; + } + + // For keys other than modifiers, we only care about the make code + if (pressed && !isModifier) { + switch (scancode) { + case Key::Scancode::KEY_CAPS_LOCK: + modifier.caps_lock ^= 1; + setLed(PS2Controller::LED_CAPS_LOCK, modifier.caps_lock); + break; + + case Key::Scancode::KEY_SCROLL_LOCK: + modifier.scroll_lock ^= 1; + setLed(PS2Controller::LED_SCROLL_LOCK, modifier.scroll_lock); + break; + + case Key::Scancode::KEY_NUM_LOCK: // Can be both NumLock and pause + // On old keyboards, the pause functionality was only accessible by + // pressing Ctrl+NumLock. Modern MF-II keyboards therefore send exactly + // this code combination when the pause key was pressed. + // Normally, the pause key does not provide an ASCII code, but we check + // that anyway. In either case, we're now done decoding. + if (modifier.ctrl_left) { // pause key + key.scancode = scancode; + } else { // NumLock + modifier.num_lock ^= 1; + setLed(PS2Controller::LED_NUM_LOCK, modifier.num_lock); + } + break; + + // Special case scan code 53: This code is used by both the minus key on the main + // keyboard and the division key on the number block. + // When the division key was pressed, we adjust the scancode accordingly. + case Key::Scancode::KEY_SLASH: + if (prefix == PREFIX_1) { + key.scancode = Key::Scancode::KEY_DIV; + key.shift = true; + } else { + key.scancode = scancode; + } + break; + + default: + key.scancode = scancode; + + // When NumLock is enabled and a key on the keypad was pressed, we + // want return the ASCII and scan codes of the corresponding numerical + // key instead of the arrow keys. + // The keys on the cursor block (prefix == PREFIX_1), however, should + // remain usable. Therefore, as a little hack, we deactivate the NumLock + // for these keys. + if (modifier.num_lock && prefix == PREFIX_1) { + key.num_lock = false; + } + } + } + } + + // The prefix is only valid for the immediately following code, which was just handled. + prefix = 0; + } + + return key; +} diff --git a/machine/keydecoder.h b/machine/keydecoder.h new file mode 100644 index 0000000..fc3dc82 --- /dev/null +++ b/machine/keydecoder.h @@ -0,0 +1,35 @@ +/*! \file + * \brief \ref KeyDecoder decodes a keystroke to the corresponding \ref Key object + */ + +#pragma once + +#include "object/key.h" + +/*! \brief Decoder for \ref ps2keyboardset1 "keyboard codes" received from the \ref PS2Controller + * \ingroup io + * + * Extracts the make and break codes, modifier and scan codes from the pressed key. + */ +class KeyDecoder { + unsigned char prefix; ///< Prefix byte for keys + Key modifier; ///< activated modifier keys (e.g., caps lock) + + public: + /*! \brief Current state (pressed or released) of all keys. + */ + bool status[Key::Scancode::KEYS]; + + /*! \brief Default constructor + */ + KeyDecoder() {} + + /*! \brief Interprets the \ref ps2keyboardset1 "make and break codes" received from the + * keyboard and derives the corresponding scan code and further information about + * other pressed keys, such as \key{shift} and \key{ctrl}. + * + * \param code Byte from Keyboard to decode + * \return Pressed key (\ref Key::valid returns `false` if the key is not yet complete) + */ + Key decode(unsigned char code); +}; diff --git a/machine/lapic.cc b/machine/lapic.cc new file mode 100644 index 0000000..e22ecec --- /dev/null +++ b/machine/lapic.cc @@ -0,0 +1,191 @@ +#include "machine/lapic.h" +#include "machine/lapic_registers.h" + +namespace LAPIC { + +/*! \brief Base Address + * used with offset to access memory mapped registers + */ +volatile uintptr_t base_address = 0xfee00000; + +Register read(Index idx) { + return *reinterpret_cast(base_address + idx); +} + +void write(Index idx, Register value) { + *reinterpret_cast(base_address + idx) = value; +} + +/*! \brief Local APIC ID (for Pentium 4 and newer) + * + * Is assigned automatically during boot and should not be changed. + * + * \see [ISDMv3, 10.4.6 Local APIC ID](intel_manual_vol3.pdf#page=371) + */ +union IdentificationRegister { + struct { + uint32_t : 24, ///< (reserved) + apic_id : 8; ///< APIC ID + }; + Register value; + + IdentificationRegister() : value(read(Index::IDENTIFICATION)) {} +} __attribute__((packed)); + +/*! \brief Local APIC Version + * + * \see [ISDMv3 10.4.8 Local APIC Version Register](intel_manual_vol3.pdf#page=373) + */ +union VersionRegister { + struct { + uint32_t version : 8, ///< 0x14 for P4 and Xeon, 0x15 for more recent hardware + : 8, ///< (reserved) + max_lvt_entry : 8, ///< Maximum number of local vector entries + suppress_eoi_broadcast : 1, ///< Support for suppressing EOI broadcasts + : 7; ///< (reserved) + }; + Register value; + + VersionRegister() : value(read(Index::VERSION)) {} +} __attribute__((packed)); + +/*! \brief Logical Destination Register + * \see [ISDMv3 10.6.2.2 Logical Destination Mode](intel_manual_vol3.pdf#page=385) + */ +union LogicalDestinationRegister { + struct { + uint32_t : 24, ///< (reserved) + lapic_id : 8; ///< Logical APIC ID + }; + Register value; + + LogicalDestinationRegister() : value(read(Index::LOGICAL_DESTINATION)) {} + ~LogicalDestinationRegister() { + write(Index::LOGICAL_DESTINATION, value); + } +} __attribute__((packed)); + +enum Model { + CLUSTER = 0x0, + FLAT = 0xf +}; + +/*! \brief Destination Format Register + * + * \see [ISDMv3 10.6.2.2 Logical Destination Mode](intel_manual_vol3.pdf#page=385) + */ +union DestinationFormatRegister { + struct { + uint32_t : 28; ///< (reserved) + Model model : 4; ///< Model (Flat vs. Cluster) + }; + Register value; + DestinationFormatRegister() : value(read(Index::DESTINATION_FORMAT)) {} + ~DestinationFormatRegister() { + write(Index::DESTINATION_FORMAT, value); + } +} __attribute__((packed)); + +/*! \brief Task Priority Register + * + * \see [ISDMv3 10.8.3.1 Task and Processor Priorities](intel_manual_vol3.pdf#page=391) + */ +union TaskPriorityRegister { + struct { + uint32_t task_prio_sub : 4, ///< Task Priority Sub-Class + task_prio : 4, ///< Task Priority + : 24; ///< (reserved) + }; + Register value; + TaskPriorityRegister() : value(read(Index::TASK_PRIORITY)) {} + ~TaskPriorityRegister() { + write(Index::TASK_PRIORITY, value); + } +} __attribute__((packed)); + +/*! \brief APIC Software Status for Spurious Interrupt Vector */ +enum APICSoftware { + APIC_DISABLED = 0, + APIC_ENABLED = 1, +}; + +/*! \brief Focus Processor Checking for Spurious Interrupt Vector */ +enum FocusProcessorChecking { + CHECKING_ENABLED = 0, + CHECKING_DISABLED = 1, +}; + +/*! \brief Suppress End-Of-Interrupt-Broadcast for Spurious Interrupt Vector */ +enum SuppressEOIBroadcast { + BROADCAST = 0, + SUPPRESS_BROADCAST = 1, +}; + +/*! \brief Spurious Interrupt Vector Register + * + * \see [ISDMv3 10.9 Spurious Interrupt](intel_manual_vol3.pdf#page=394) + */ +union SpuriousInterruptVectorRegister { + struct { + uint32_t spurious_vector : 8; ///< Spurious Vector + APICSoftware apic_software : 1; ///< APIC Software Enable/Disable + FocusProcessorChecking focus_processor_checking : 1; ///< Focus Processor Checking + uint32_t reserved_1 : 2; + SuppressEOIBroadcast eoi_broadcast_suppression : 1; + uint32_t reserved:19; + }; + Register value; + + SpuriousInterruptVectorRegister() : value(read(Index::SPURIOUS_INTERRUPT_VECTOR)) {} + ~SpuriousInterruptVectorRegister() { + write(Index::SPURIOUS_INTERRUPT_VECTOR, value); + } +} __attribute__((packed)); +static_assert(sizeof(SpuriousInterruptVectorRegister) == 4, "LAPIC Spurious Interrupt Vector has wrong size"); + +uint8_t getID() { + IdentificationRegister ir; + return ir.apic_id; +} + +uint8_t getLogicalID() { + LogicalDestinationRegister ldr; + return ldr.lapic_id; +} + +uint8_t getVersion() { + VersionRegister vr; + return vr.version; +} + +void init(uint8_t logical_id) { + // reset logical destination ID + // can be set using setLogicalLAPICID() + LogicalDestinationRegister ldr; + ldr.lapic_id = logical_id; + + // set task priority to 0 -> accept all interrupts + TaskPriorityRegister tpr; + tpr.task_prio = 0; + tpr.task_prio_sub = 0; + + // set flat delivery mode + DestinationFormatRegister dfr; + dfr.model = Model::FLAT; + + // use 255 as spurious vector, enable APIC and disable focus processor + SpuriousInterruptVectorRegister sivr; + sivr.spurious_vector = 0xff; + sivr.apic_software = APICSoftware::APIC_ENABLED; + sivr.focus_processor_checking = FocusProcessorChecking::CHECKING_DISABLED; +} + +void endOfInterrupt() { + // dummy read + read(SPURIOUS_INTERRUPT_VECTOR); + + // signal end of interrupt + write(EOI, 0); +} + +} // namespace LAPIC diff --git a/machine/lapic.h b/machine/lapic.h new file mode 100644 index 0000000..b39c77a --- /dev/null +++ b/machine/lapic.h @@ -0,0 +1,142 @@ +/*! \file + * \brief \ref LAPIC abstracts access to the Local \ref APIC + */ + +#pragma once + +#include "types.h" + +/*! \brief Abstracts the local APIC (which is integrated into every CPU core) + * \ingroup interrupts + * + * In modern (x86) PCs, every CPU core has its own Local APIC (LAPIC). The LAPIC is the link between the + * local CPU core and the I/O APIC (that takes care about external interrupt sources. + * Interrupt messages received by the LAPIC will be passed to the corresponding CPU core and trigger the + * interrupt handler on this core. + * + * \see [ISDMv3 10.4 Local APIC](intel_manual_vol3.pdf#page=366) + */ +namespace LAPIC { + /*! \brief Initialized the local APIC of the calling CPU core and sets the logical LAPIC ID in the LDR register + * \param logical_id APIC ID to be set + */ + void init(uint8_t logical_id); + + /*! \brief Signalize EOI (End of interrupt) + * + * Signalizes the LAPIC that the handling of the current interrupt finished. This function must be called at + * the end of interrupt handling before ireting. + */ + void endOfInterrupt(); + + /*! \brief Get the ID of the current core's LAPIC + * \return LAPIC ID + */ + uint8_t getID(); + + /*! \brief Get the Logical ID of the current core's LAPIC + * \return Logical ID + */ + uint8_t getLogicalID(); + + /*! \brief Set the Logical ID of the current core's LAPIC + * \param id new Logical ID + */ + void setLogicalID(uint8_t id); + + /*! \brief Get version number of local APIC + * \return version number + */ + uint8_t getVersion(); + +/*! \brief Inter-Processor Interrupts + * + * For multi-core systems, the LAPIC enables sending messages (Inter-Processor Interrupts, IPIs) to + * other CPU cores and receiving those sent from other cores. + * + * \see [ISDMv3 10.6 Issuing Interprocessor Interrupts](intel_manual_vol3.pdf#page=380) + */ +namespace IPI { + + /*! \brief Check if the previously sent IPI has reached its destination. + * + * \return `true` if the previous IPI was accepted from its target processor, otherwise `false` + */ + bool isDelivered(); + + /*! \brief Send an Inter-Processor Interrupt (IPI) + * \param destination ID of the target processor (use APIC::getLAPICID(core) ) + * \param vector Interrupt vector number to be triggered + */ + void send(uint8_t destination, uint8_t vector); + + /*! \brief Send an Inter-Processor Interrupt (IPI) to a group of processors + * \param logical_destination Mask containing the logical APIC IDs of the target processors (use APIC::getLogicalLAPICID()) + * \param vector Interrupt vector number to be triggered + */ + void sendGroup(uint8_t logical_destination, uint8_t vector); + + /*! \brief Send an Inter-Processor Interrupt (IPI) to all processors (including self) + * \param vector Interrupt vector number to be triggered + */ + void sendAll(uint8_t vector); + + /*! \brief Send an Inter-Processor Interrupt (IPI) to all other processors (all but self) + * \param vector Interrupt vector number to be triggered + */ + void sendOthers(uint8_t vector); + + /*! \brief Send an INIT request IPI to all other processors + * + * \note Only required for startup + * + * \param assert if `true` send an INIT, + * on `false` send an INIT Level De-assert + */ + void sendInit(bool assert = true); + + /*! \brief Send an Startup IPI to all other processors + * + * \note Only required for startup + * + * \param vector Pointer to a startup routine + */ + void sendStartup(uint8_t vector); + +} // namespace IPI + +/*! \brief Local Timer (for each LAPIC / CPU) + * + * \see [ISDMv3 10.5.4 APIC Timer](intel_manual_vol3.pdf#page=378) + */ +namespace Timer { + + /*! \brief Determines the LAPIC timer frequency. + * + * This function will calculate the number of LAPIC-timer ticks passing in the course of one millisecond. + * To do so, this function will rely on PIT timer functionality and measure the tick delta between start + * and end of waiting for a predefined period. + * + * For measurement, the LAPIC-timer single-shot mode (without interrupts) is used; after measurement, the + * timer is disabled again. + * + * \note The timer is counting towards zero. + * + * \return Number of LAPIC-timer ticks per millisecond + */ + uint32_t ticks(void); + + /*! \brief Set the LAPIC timer. + * \param counter Initial counter value; decremented on every LAPIC timer tick + * \param divide Divider (power of 2, i.e., 1 2 4 8 16 32...) used as prescaler between bus frequency + * and LAPIC timer frequency: `LAPIC timer frequency = divide * bus frequency`. + * `divide` is a numerical parameter, the conversion to the corresponding bit mask is + * done internally by calling getClockDiv(). + * \param vector Interrupt vector number to be triggered on counter expiry + * \param periodic If set, the interrupt will be issued periodically + * \param masked If set, interrupts on counter expiry are suppressed + */ + void set(uint32_t counter, uint8_t divide, uint8_t vector, bool periodic, bool masked = false); + +} // namespace Timer +} // namespace LAPIC diff --git a/machine/lapic_ipi.cc b/machine/lapic_ipi.cc new file mode 100644 index 0000000..eae9216 --- /dev/null +++ b/machine/lapic_ipi.cc @@ -0,0 +1,228 @@ +#include "machine/lapic_registers.h" + +namespace LAPIC { +namespace IPI { + +/*! \brief Delivery mode specifies the type of interrupt sent to the CPU. */ +enum DeliveryMode { + FIXED = 0, ///< "ordinary" interrupt; send to ALL cores listed in the destination bit mask + LOWEST_PRIORITY = 1, ///< "ordinary" interrupt; send to the lowest priority core from destination mask + SMI = 2, ///< System Management Interrupt; vector number required to be 0 + // Reserved + NMI = 4, ///< Non-Maskable Interrupt, vector number ignored, only edge triggered + INIT = 5, ///< Initialization interrupt (always treated as edge triggered) + INIT_LEVEL_DEASSERT = 5, ///< Synchronization interrupt + STARTUP = 6, ///< Dedicated Startup-Interrupt (SIPI) + // Reserved +}; + +/*! \brief Way of interpreting the value written to the destination field. */ +enum DestinationMode { + PHYSICAL = 0, ///< Destination contains the physical destination APIC ID + LOGICAL = 1 ///< Destination contains a mask of logical APIC IDs +}; + +/*! \brief Interrupt state */ +enum DeliveryStatus { + IDLE = 0, ///< No activity for this interrupt + SEND_PENDING = 1 ///< Interrupt will be sent as soon as the bus / LAPIC is ready +}; + +/*! \brief Interrupt level */ +enum Level { + DEASSERT = 0, ///< Must be zero when DeliveryMode::INIT_LEVEL_DEASSERT + ASSERT = 1 ///< Must be one for all other delivery modes +}; + +/*! \brief Trigger mode for DeliveryMode::INIT_LEVEL_DEASSERT */ +enum TriggerMode { + EDGE_TRIGGERED = 0, ///< edge triggered + LEVEL_TRIGGERED = 1 ///< level triggered +}; + +/*! \brief Shorthand for commonly used destinations */ +enum DestinationShorthand { + NO_SHORTHAND = 0, ///< Use destination field instead of shorthand + SELF = 1, ///< Send IPI to self + ALL_INCLUDING_SELF = 2, ///< Send IPI to all including self + ALL_EXCLUDING_SELF = 3 ///< Send IPI to all except self +}; + +/*! \brief Interrupt mask */ +enum InterruptMask { + UNMASKED = 0, ///< Interrupt entry is active (non-masked) + MASKED = 1 ///< Interrupt entry is deactivated (masked) +}; + +/*! \brief Interrupt Command + * + * \see [ISDMv3 10.6.1 Interrupt Command Register (ICR)](intel_manual_vol3.pdf#page=381) + */ +union InterruptCommand { + struct { + /*! \brief Interrupt vector in the \ref IDT "Interrupt Descriptor Table (IDT)" will be + * activated when the corresponding external interrupt triggers. + *//*! \brief Interrupt vector in the \ref IDT "Interrupt Descriptor Table (IDT)" will be + * activated when the corresponding external interrupt triggers. + */ + uint64_t vector : 8; + + /*! \brief The delivery mode denotes the way the interrupts will be delivered to the local CPU + * cores, respectively to their local APICs. + * + * For StuBS, we use `DeliveryMode::LowestPriority`, as all CPU cores have the same + * priority and we want to distribute interrupts evenly among them. + * It, however, is not guaranteed that this method of load balancing will work on every system. + */ + enum DeliveryMode delivery_mode : 3; + + /*! \brief The destination mode defines how the value stored in `destination` will be + * interpreted. + * + * For StuBS, we use `DestinationMode::Logical`. + */ + enum DestinationMode destination_mode : 1; + + /*! \brief Delivery status holds the current status of interrupt delivery. + * + * \note This field is read only; write accesses to this field will be ignored. + */ + enum DeliveryStatus delivery_status : 1; + + uint64_t : 1; ///< reserved + + /*! \brief The polarity denotes when an interrupt should be issued. + * + * For StuBS, we use `Polarity::High` (i.e., when the interrupt line is, logically, 1). + */ + enum Level level : 1; + + /*! \brief The trigger mode states whether the interrupt signaling is level or edge triggered. + * + * StuBS uses `TriggerMode::Edge` for Keyboard and Timer, the (optional) serial interface, + * however, needs `TriggerMode::Level`. + */ + enum TriggerMode trigger_mode : 1; + + uint64_t : 2; ///< reserved + + enum DestinationShorthand destination_shorthand : 2; + + uint64_t : 36; ///< Reserved, do not modify + + /*! \brief Interrupt destination. + * + * The meaning of destination depends on the destination mode: + * For the logical destination mode, destination holds a bit mask made up of the cores that + * are candidates for receiving the interrupt. + * In the single-core case, this value is `1`, in the multi-core case, the `n` low-order bits + * needs to be set (with `n` being the number of CPU cores, see \ref Core::count() ). + * Setting the `n` low-order bits marks all available cores as candidates for receiving + * interrupts and thereby balancing the number of interrupts between the cores. + * + * \note This form of load balancing depends on the hardware's behavior and may not work on all + * systems in the same fashion. Most notably, in QEMU all interrupts are sent to the BSP + * (core 0). + */ + uint64_t destination : 8; + } __attribute__((packed)); + + /*! \brief I/O redirection-table entry + * + * Every entry in the redirection table represents an external source of interrupts and has a size + * of 64 bits. Due to the I/O APIC registers being only 32 bits wide, the 64-bit value is split in two + * 32 bit values. + */ + struct { + Register value_low; ///< First, low-order register + Register value_high; ///< Second, high-order register + } __attribute__((packed)); + + /*! \brief Default constructor */ + InterruptCommand() = default; + + explicit InterruptCommand(uint8_t destination, uint8_t vector = 0, + DestinationMode destination_mode = DestinationMode::PHYSICAL, + DeliveryMode delivery_mode = DeliveryMode::FIXED, + TriggerMode trigger_mode = TriggerMode::EDGE_TRIGGERED, + Level level = Level::ASSERT) { + readRegister(); + this->vector = vector; + this->delivery_mode = delivery_mode; + this->destination_mode = destination_mode; + this->level = level; + this->trigger_mode = trigger_mode; + this->destination_shorthand = DestinationShorthand::NO_SHORTHAND; + this->destination = destination; + } + + InterruptCommand(DestinationShorthand destination_shorthand, uint8_t vector, + DeliveryMode delivery_mode = DeliveryMode::FIXED, + TriggerMode trigger_mode = TriggerMode::EDGE_TRIGGERED, + Level level = Level::ASSERT) { + readRegister(); + this->vector = vector; + this->delivery_mode = delivery_mode; + this->level = level; + this->trigger_mode = trigger_mode; + this->destination_shorthand = destination_shorthand; + this->destination = destination; + } + + void send() const { + write(INTERRUPT_COMMAND_REGISTER_HIGH, value_high); + write(INTERRUPT_COMMAND_REGISTER_LOW, value_low); + } + + bool isSendPending() { + value_low = read(INTERRUPT_COMMAND_REGISTER_LOW); + return delivery_status == DeliveryStatus::SEND_PENDING; + } + + private: + void readRegister() { + while (isSendPending()) {} + value_high = read(INTERRUPT_COMMAND_REGISTER_HIGH); + } +}; +static_assert(sizeof(InterruptCommand) == 8, "LAPIC Interrupt Command has wrong size"); + +bool isDelivered() { + InterruptCommand ic; + return !ic.isSendPending(); +} + +void send(uint8_t destination, uint8_t vector) { + InterruptCommand ic(destination, vector); + ic.send(); +} + +void sendGroup(uint8_t logical_destination, uint8_t vector) { + InterruptCommand ic(logical_destination, vector, DestinationMode::LOGICAL); + ic.send(); +} + +void sendAll(uint8_t vector) { + InterruptCommand ic(DestinationShorthand::ALL_INCLUDING_SELF, vector); + ic.send(); +} + +void sendOthers(uint8_t vector) { + InterruptCommand ic(DestinationShorthand::ALL_EXCLUDING_SELF, vector); + ic.send(); +} + +void sendInit(bool assert) { + LAPIC::IPI::InterruptCommand ic(DestinationShorthand::ALL_EXCLUDING_SELF, 0, DeliveryMode::INIT, + assert ? TriggerMode::EDGE_TRIGGERED : TriggerMode::LEVEL_TRIGGERED, + assert ? Level::ASSERT : Level::DEASSERT); + ic.send(); +} + +void sendStartup(uint8_t vector) { + InterruptCommand ic(DestinationShorthand::ALL_EXCLUDING_SELF, vector, DeliveryMode::STARTUP); + ic.send(); +} + +} // namespace IPI +} // namespace LAPIC diff --git a/machine/lapic_registers.h b/machine/lapic_registers.h new file mode 100644 index 0000000..f4fc317 --- /dev/null +++ b/machine/lapic_registers.h @@ -0,0 +1,48 @@ +/*! \file + * \brief Structures and macros for accessing \ref LAPIC "the local APIC". + */ + +#pragma once + +#include "types.h" + +namespace LAPIC { + // Memory Mapped Base Address + extern volatile uintptr_t base_address; + + typedef uint32_t Register; + + /*! \brief Register Offset Index + * + * \see [ISDMv3 10.4.1 The Local APIC Block Diagram](intel_manual_vol3.pdf#page=368) + */ + enum Index : uint16_t { + IDENTIFICATION = 0x020, ///< Local APIC ID Register, RO (sometimes R/W). Do not change! + VERSION = 0x030, ///< Local APIC Version Register, RO + TASK_PRIORITY = 0x080, ///< Task Priority Register, R/W + EOI = 0x0b0, ///< EOI Register, WO + LOGICAL_DESTINATION = 0x0d0, ///< Logical Destination Register, R/W + DESTINATION_FORMAT = 0x0e0, ///< Destination Format Register, bits 0-27 RO, bits 28-31 R/W + SPURIOUS_INTERRUPT_VECTOR = 0x0f0, ///< Spurious Interrupt Vector Register, bits 0-8 R/W, bits 9-1 R/W + INTERRUPT_COMMAND_REGISTER_LOW = 0x300, ///< Interrupt Command Register 1, R/W + INTERRUPT_COMMAND_REGISTER_HIGH = 0x310, ///< Interrupt Command Register 2, R/W + TIMER_CONTROL = 0x320, ///< LAPIC timer control register, R/W + TIMER_INITIAL_COUNTER = 0x380, ///< LAPIC timer initial counter register, R/W + TIMER_CURRENT_COUNTER = 0x390, ///< LAPIC timer current counter register, RO + TIMER_DIVIDE_CONFIGURATION = 0x3e0 ///< LAPIC timer divide configuration register, RW + }; + + /*! \brief Get value from APIC register + * + * \param idx Register Offset Index + * \return current value of register + */ + Register read(Index idx); + + /*! \brief Write value to APIC register + * + * \param idx Register Offset Index + * \param value value to be written into register + */ + void write(Index idx, Register value); +} // namespace LAPIC diff --git a/machine/lapic_timer.cc b/machine/lapic_timer.cc new file mode 100644 index 0000000..02dbd1d --- /dev/null +++ b/machine/lapic_timer.cc @@ -0,0 +1,97 @@ +#include "types.h" +#include "machine/lapic.h" +#include "machine/lapic_registers.h" +#include "machine/core.h" + +namespace LAPIC { +namespace Timer { + +/*! \brief Timer Delivery Status */ +enum DeliveryStatus { + IDLE = 0, + SEND_PENDING = 1 +}; + +/*! \brief Timer Mode */ +enum TimerMode { + ONE_SHOT = 0, + PERIODIC = 1, + DEADLINE = 2 + // reserved +}; + +/*! \brief Timer Mask */ +enum Mask { + NOT_MASKED = 0, + MASKED = 1 +}; + +static const Register INVALID_DIV = 0xff; + +/*! \brief LAPIC-Timer Control Register + * + * \see [ISDMv3 10.5.1 Local Vector Table](intel_manual_vol3.pdf#page=375) + */ +union ControlRegister { + struct { + uint32_t vector : 8; ///< Vector + uint32_t : 4; + DeliveryStatus delivery_status : 1; ///< Delivery Status + uint32_t : 3; + Mask masked : 1; ///< Interrupt Mask (if set, interrupt will not trigger) + TimerMode timer_mode : 2; ///< Timer Mode + uint32_t : 13; + }; + Register value; +} __attribute__((packed)); + +/*! \brief LAPIC timer divider table + * + * \see [ISDMv3 10.5.4 APIC Timer](intel_manual_vol3.pdf#page=378) + */ +static const Register div_masks[] = { + 0xb, ///< divides by 1 + 0x0, ///< divides by 2 + 0x1, ///< divides by 4 + 0x2, ///< divides by 8 + 0x3, ///< divides by 16 + 0x8, ///< divides by 32 + 0x9, ///< divides by 64 + 0xa ///< divides by 128 +}; + +/*! \brief Calculate the bit mask for the LAPIC-timer divider. + * \param div Divider, must be power of two: 1, 2, 4, 8, 16, 32, 64, 128 + * \return Bit mask for LAPIC::Timer::set() or `0xff` if `div` is invalid. + */ +Register getClockDiv(uint8_t div) { + // div is zero or not a power of two? + if (div == 0 || (div & (div - 1)) != 0) { + return INVALID_DIV; + } + + int trail = __builtin_ctz(div); // count trailing 0-bits + if (trail > 7) { + return INVALID_DIV; + } + + return div_masks[trail]; +} + +uint32_t ticks(void) { + uint32_t ticks = 0; // ticks per millisecond + // Calculation (Assignment 5) + + return ticks; +} + +void set(uint32_t counter, uint8_t divide, uint8_t vector, bool periodic, bool masked) { + (void) counter; + (void) divide; + (void) vector; + (void) periodic; + (void) masked; +} + +} // namespace Timer +} // namespace LAPIC diff --git a/machine/pic.cc b/machine/pic.cc new file mode 100644 index 0000000..aea066f --- /dev/null +++ b/machine/pic.cc @@ -0,0 +1,61 @@ +#include "machine/pic.h" +#include "machine/ioport.h" +#include "types.h" + +namespace PIC { + +void initialize() { + // Access primary & secondary PIC via two ports each + IOPort primary_port_a(0x20); + IOPort primary_port_b(0x21); + IOPort secondary_port_a(0xa0); + IOPort secondary_port_b(0xa1); + + // Initialization Command Word 1 (ICW1) + // Basic PIC configuration, starting initialization + enum InitializationCommandWord1 { + ICW4_NEEDED = 1 << 0, // use Initialization Command Word 4 + SINGLE_MODE = 1 << 1, // Single or multiple (cascade mode) 8259A + ADDRESS_INTERVAL_HALF = 1 << 2, // 4 or 8 bit interval between the interrupt vector locations + LEVEL_TRIGGERED = 1 << 3, // Level or edge triggered + ALWAYS_1 = 1 << 4, + }; + const uint8_t icw1 = InitializationCommandWord1::ICW4_NEEDED + | InitializationCommandWord1::ALWAYS_1; + // ICW1 in port A (each) + primary_port_a.outb(icw1); + secondary_port_a.outb(icw1); + + // Initialization Command Word 2 (ICW2): + // Configure interrupt vector base offset in port B + primary_port_b.outb(0x20); // Primary: IRQ Offset 32 + secondary_port_b.outb(0x28); // Secondary: IRQ Offset 40 + + // Initialization Command Word 3 (ICW3): + // Configure pin on primary PIC connected to secondary PIC + const uint8_t pin = 2; // Secondary connected on primary pin 2 + primary_port_b.outb(1 << pin); // Pin as bit mask for primary + secondary_port_b.outb(pin); // Pin as value (ID) for secondary + + // Initialization Command Word 4 (ICW4) + // Basic PIC configuration, starting initialization + enum InitializationCommandWord4 { + MODE_8086 = 1 << 0, // 8086/8088 or 8085 mode + AUTO_EOI = 1 << 1, // Single or multiple (cascade mode) 8259A + BUFFER_PRIMARY = 1 << 2, // Primary or secondary buffering + BUFFERED_MODE = 1 << 3, // Enable or disable buffering (for primary or secondary above) + SPECIAL_FULLY_NESTED = 1 << 4 // Special or non special fully nested + }; + const uint8_t icw4 = InitializationCommandWord4::MODE_8086 + | InitializationCommandWord4::AUTO_EOI; + // ICW3 in port B (each) + primary_port_b.outb(icw4); + secondary_port_b.outb(icw4); + + // Operation Control Word 1 (OCW1): + // Disable (mask) all hardware interrupts on both legacy PICs (we'll use APIC) + secondary_port_b.outb(0xff); + primary_port_b.outb(0xff); +} + +} // namespace PIC diff --git a/machine/pic.h b/machine/pic.h new file mode 100644 index 0000000..e27d537 --- /dev/null +++ b/machine/pic.h @@ -0,0 +1,17 @@ +/*! \file + * \brief Handle (disable) the old Programmable Interrupt Controller (PIC) + */ + +#pragma once + +/*! \brief The Programmable Interrupt Controller (PIC aka 8259A) + */ +namespace PIC { + +/*! \brief Initialize the PICs (Programmable Interrupt Controller, 8259A), + * such that all 15 hardware interrupts are stored sequentially in the \ref IDT + * and the hardware interrupts are disabled (in favor of \ref APIC). + */ +void initialize(); + +} // namespace PIC diff --git a/machine/pit.cc b/machine/pit.cc new file mode 100644 index 0000000..faa8839 --- /dev/null +++ b/machine/pit.cc @@ -0,0 +1,216 @@ +#include "machine/pit.h" +#include "machine/ioport.h" +#include "machine/core.h" + +namespace PIT { + +// we only use PIT channel 2 +const uint8_t CHANNEL = 2; +static IOPort data(0x40 + CHANNEL); + +/*! \brief Access mode + */ +enum AccessMode { + LATCH_COUNT_VALUE = 0, + LOW_BYTE_ONLY = 1, + HIGH_BYTE_ONLY = 2, + LOW_AND_HIGH_BYTE = 3 +}; + +/*! \brief Operating Mode + * + * \warning Channel 2 is not able to send interrupts, however, the status bit will be set + */ +enum OperatingMode { + INTERRUPT_ON_TERMINAL_COUNT = 0, + PROGRAMMABLE_ONE_SHOT = 1, + RATE_GENERATOR = 2, + SQUARE_WAVE_GENERATOR = 3, ///< useful for the PC speaker + SOFTWARE_TRIGGERED_STROBE = 4, + HARDWARE_TRIGGERED_STROBE = 5 +}; + +/*! \brief data format + */ +enum Format { + BINARY = 0, + BCD = 1 ///< Binary Coded Decimals +}; + +// Mode register (only writable) +static IOPort mode_register(0x43); +union Mode { + struct { + Format format : 1; + OperatingMode operating : 3; + AccessMode access : 2; + uint8_t channel : 2; + }; + uint8_t value; + + /*! \brief Constructor for mode, takes the numeric value */ + explicit Mode(uint8_t value) : value(value) {} + + /*! \brief Constructor for counting mode + * \param access Access mode to the 16-bit counter value + * \param operating Operating mode for the counter + * \param format Number format for the 16-bit counter values (binary or BCD) + */ + Mode(AccessMode access, OperatingMode operating, Format format) : + format(format), operating(operating), access(access), channel(PIT::CHANNEL) {} + + /*! \brief (Default) constructor for reading the counter value + */ + Mode() : value(0) { + this->channel = PIT::CHANNEL; + } + + /*! \brief Write the value to the mode register + */ + void write() const { + mode_register.outb(value); + } +}; + +// The NMI Status and Control Register contains details about PIT counter 2 +static IOPort controlRegister(0x61); +union Control { + /*! \brief I/O-port bitmap for the NMI Status and Control Register + * \note Over time, the meaning of the bits stored at I/O port 0x61 changed; don't get the structure confused + * with old documentation on the IBM PC XT platform. + * \see [Intel® I/O Controller Hub 7 (ICH7) Family](i-o-controller-hub-7-datasheet.pdf#page=415), page 415 + */ + struct { + //! If enabled, the interrupt state will be visible at status_timer_counter2 + uint8_t enable_timer_counter2 : 1; + uint8_t enable_speaker_data : 1; ///< If set, speaker output is equal to status_timer_counter2 + uint8_t enable_pci_serr : 1; ///< not important, do not modify + uint8_t enable_nmi_iochk : 1; ///< not important, do not modify + const uint8_t refresh_cycle_toggle : 1; ///< not important, must be 0 on write + const uint8_t status_timer_counter2 : 1; ///< will be set on timer expiration; must be 0 on write + const uint8_t status_iochk_nmi_source : 1; ///< not important, must be 0 on write + const uint8_t status_serr_nmi_source : 1; ///< not important, must be 0 on write + }; + uint8_t value; + + /*! \brief Constructor + * \param value Numeric value for the control register + */ + explicit Control(uint8_t value) : value(value) {} + + /*! \brief Default constructor + * Automatically reads the current contents from the control register. + */ + Control() : value(controlRegister.inb()) {} + + /*! \brief Write the current state to the control register. + */ + void write() const { + controlRegister.outb(value); + } +}; + +// The base frequency is, due to historic reasons, 1.193182 MHz. +const uint64_t BASE_FREQUENCY = 1193182ULL; + +bool set(uint16_t us) { + // Counter ticks for us + uint64_t counter = BASE_FREQUENCY * us / 1000000ULL; + + // As the hardware counter has a size of 16 bit, we want to check whether the + // calculated counter value is too large ( > 54.9ms ) + if (counter > 0xffff) { + return false; + } + + // Interrupt state should be readable in status register, but PC speaker should remain off + Control c; + c.enable_speaker_data = 0; + c.enable_timer_counter2 = 1; + c.write(); + + // Channel 2, 16-bit divisor, with mode 0 (interrupt) in binary format + Mode m(AccessMode::LOW_AND_HIGH_BYTE, OperatingMode::INTERRUPT_ON_TERMINAL_COUNT, Format::BINARY); + m.write(); + + // Set the counter's start value + data.outb(counter & 0xff); // low + data.outb((counter >> 8) & 0xff); // high + + return true; +} + +uint16_t get(void) { + // Set mode to reading + Mode m; + m.write(); + + // Read low and high + uint16_t value = data.inb(); + value |= data.inb() << 8; + + return value; +} + +bool isActive(void) { + Control c; // reads the current value from the control register + return c.enable_timer_counter2 == 1 && c.status_timer_counter2 == 0; +} + +bool waitForTimeout(void) { + while(true) { + Control c; // reads the current value from the control register + if (c.enable_timer_counter2 == 0) { + return false; + } else if (c.status_timer_counter2 == 1) { + return true; + } else { + Core::pause(); + } + } +} + +bool delay(uint16_t us) { + return set(us) && waitForTimeout(); +} + +void pcspeaker(uint32_t freq) { + Control c; + if (freq == 0) { + disable(); + } else { + // calculate frequency divider + uint64_t div = BASE_FREQUENCY / freq; + if (div > 0xffff) { + div = 0xffff; + } + + // check if already configured + if (c.enable_speaker_data == 0) { + // if not, set mode + Mode m(AccessMode::LOW_AND_HIGH_BYTE, OperatingMode::SQUARE_WAVE_GENERATOR, Format::BINARY); + m.write(); + } + + // write frequency divider + data.outb(div & 0xff); + data.outb((div >> 8) & 0xff); + + // already configured? (second part to prevent playing a wrong sound) + if (c.enable_speaker_data == 0) { + // activate PC speaker + c.enable_speaker_data = 1; + c.enable_timer_counter2 = 1; + c.write(); + } + } +} + +void disable(void) { + Control c; + c.enable_speaker_data = 0; + c.enable_timer_counter2 = 0; + c.write(); +} + +} // namespace PIT diff --git a/machine/pit.h b/machine/pit.h new file mode 100644 index 0000000..ca10b34 --- /dev/null +++ b/machine/pit.h @@ -0,0 +1,74 @@ +/*! \file + * \brief The old/historical \ref PIT "Programmable Interval Timer (PIT)" + */ + +#pragma once + +#include "types.h" + +/*! \brief Abstraction of the historical Programmable Interval Timer (PIT). + * + * Historically, PCs had a Timer component of type 8253 or 8254, modern systems come with a compatible chip. + * Each of these chips provides three 16-bit wide counters ("channel"), each running at a frequency of 1.19318 MHz. + * The timer's counting speed is thereby independent from the CPU frequency. + * + * Traditionally, the first counter (channel 0) was used for triggering interrupts, the second one (channel 1) controlled + * the memory refresh, and the third counter (channel 2) was assigned to the PC speaker. + * + * As the PIT's frequency is fixed to a constant value of 1.19318 MHz, the PIT can be used for calibration. + * For this purpose, we use channel 2 only. + * + * \note Interrupts should be disabled while configuring the timer. + */ +namespace PIT { + +/*! \brief Start timer + * + * Sets the channel 2 timer to the provided value and starts counting. + * + * \note The maximum waiting time is approx. 55 000 us due to the timers being limited to 16 bit. + * \param us Waiting time in us + * \return `true` if the counter is running; `false` if the waiting time exceeds the limits. + */ +bool set(uint16_t us); + +/*! \brief Reads the current timer value + * \return Current timer value + */ +uint16_t get(void); + +/*! \brief Check if the timer is running + * \return `true` if running, `false` otherwise + */ +bool isActive(void); + +/*! \brief (Active) waiting for timeout + * \return `true` when timeout was successfully hit, `false` if the timer was not active prior to calling. + */ +bool waitForTimeout(void); + +/*! \brief Set the timer and wait for timeout + * \note The maximum waiting time is approx. 55 000 us due to the timers being limited to 16 bit. + * \param us Waiting time in us + * \return `true` when waiting successfully terminated; `false` on error (e.g., waiting time exceeds its limits) + */ +bool delay(uint16_t us); + +/*! \brief Play a given frequency on the PC speaker. + * + * As the PC speaker is connected to PIT channel 2, the PIT can be used to play an acoustic signal. + * Playing sounds occupies the PIT, so it cannot be used for other purposes while playback. + * + * \note Not every PC has an activated PC speaker + * \note Qemu & KVM have to be launched with `-audiodev` + * If you still cannot hear anything, try to set `QEMU_AUDIO_DRV` to `alsa` + * (by launching \StuBS with `QEMU_AUDIO_DRV=alsa make kvm`) + * \param freq Frequency (in Hz) of the sound to be played, or 0 to deactivate playback. + */ +void pcspeaker(uint32_t freq); + +/*! \brief Deactivate the timer + */ +void disable(void); + +} // namespace PIT diff --git a/machine/ps2controller.cc b/machine/ps2controller.cc new file mode 100644 index 0000000..27d5c41 --- /dev/null +++ b/machine/ps2controller.cc @@ -0,0 +1,118 @@ +#include "machine/ps2controller.h" +#include "machine/keydecoder.h" +#include "machine/ioport.h" +#include "debug/output.h" +#include "compiler/fix.h" + +namespace PS2Controller { + +// I/O Ports of the PS2 Controller +static const IOPort ctrl_port(0x64); ///< Access status- (read) and command (write) register +static const IOPort data_port(0x60); ///< Access PS/2 device [keyboard] output- (read) and input (write) buffer +/* The buffers are used to communicate with the controller or the connected + * PS/2 devices alike: + * - For the output buffer, the controller decides to which PS/2 device the + * data gets forwarded to -- by default it is the primary PS/2 device (keyboard). + * - The source device from which the data was gathered can be determined using + * the status flag (\ref IS_MOUSE). + * + * Please also note, that the naming of the buffer may be a bit contra-intuitive + * since it is the perspective of the PS/2 controller due to historical reasons. + */ + +// Key decoder (stores the state of the modifier keys) +static KeyDecoder key_decoder; + +// To store the current state of the Keyboard LEDs +static uint8_t MAYBE_UNUSED leds = 0; + +/*! \brief Flags in the PS/2 controller status register + */ +enum Status { + HAS_OUTPUT = 1 << 0, ///< Output buffer non-empty? + INPUT_PENDING = 1 << 1, ///< Is input buffer full? + SYSTEM_FLAG = 1 << 2, ///< set on soft reset, cleared on power up + IS_COMMAND = 1 << 3, ///< Is command Byte? (otherwise data) + IS_MOUSE = 1 << 5, ///< Mouse output has data + TIMEOUT_ERROR = 1 << 6, ///< Timeout error + PARITY_ERROR = 1 << 7 ///< Parity error +}; + +/*! \brief Commands to be send to the Keyboard + */ +enum KeyboardCommand : uint8_t { + KEYBOARD_SET_LED = 0xed, ///< Set the LED (according to the following parameter byte) + KEYBOARD_SEND_ECHO = 0xee, ///< Send an echo packet + KEYBOARD_SET_SPEED = 0xf3, ///< Set the repeat rate (according to the following parameter byte) + KEYBOARD_ENABLE = 0xf4, ///< Enable Keyboard + KEYBOARD_DISABLE = 0xf5, ///< Disable Keyboard + KEYBOARD_SET_DEFAULT = 0xf6, ///< Load defaults +}; + +/*! \brief Replies + */ +enum Reply { + ACK = 0xfa, ///< Acknowledgement + RESEND = 0xfe, ///< Request to resend (not required to implement) + ECHO = 0xee ///< Echo answer +}; + +/*! \brief Commands for the PS/2 Controller + * + * These commands are processed by the controller and *not* send to keyboard/mouse. + * They have to be written into the command register. + */ +enum ControllerCommand { + CONTROLLER_GET_COMMAND_BYTE = 0x20, ///< Read Command Byte of PS/2 Controller + CONTROLLER_SET_COMMAND_BYTE = 0x60, ///< Write Command Byte of PS/2 Controller + CONTROLLER_MOUSE_DISABLE = 0xa7, ///< Disable mouse interface + CONTROLLER_MOUSE_ENABLE = 0xa8, ///< Enable mouse interface + CONTROLLER_KEYBOARD_DISABLE = 0xad, ///< Disable keyboard interface + CONTROLLER_KEYBOARD_ENABLE = 0xae, ///< Enable keyboard interface + CONTROLLER_SEND_TO_MOUSE = 0xd4, ///< Send parameter to mouse device +}; + +/*! \brief Send a command or data to a connected PS/2 device + * + * The value must only be written into the input buffer after the previously + * written values have been fetched (\ref INPUT_PENDING in the status register). + * + * \todo Implement method + * + * \param value data to be sent + */ +static void MAYBE_UNUSED sendData(uint8_t value) { + // TODO: You have to implement this method + (void) value; +} + +void init() { + + // Switch all LEDs off (on many PCs NumLock is turned on after power up) + setLed(LED_CAPS_LOCK, false); + setLed(LED_SCROLL_LOCK, false); + setLed(LED_NUM_LOCK, false); + + // Set to maximum speed & minimum delay + setRepeatRate(SPEED_30_0CPS, DELAY_250MS); +} + +bool fetch(Key &pressed) { + // TODO: You have to implement this method + (void) pressed; + return false; +} + +void setRepeatRate(Speed speed, Delay delay) { + // TODO: You have to implement this method. Use sendData() + (void) speed; + (void) delay; +} + +void setLed(enum LED led, bool on) { + // TODO: You have to implement this method. Use sendData() + (void) led; + (void) on; +} + +} // namespace PS2Controller diff --git a/machine/ps2controller.h b/machine/ps2controller.h new file mode 100644 index 0000000..87b9742 --- /dev/null +++ b/machine/ps2controller.h @@ -0,0 +1,127 @@ +/*! \file + * \brief \ref PS2Controller "PS/2 Controller" (Intel 8042, also known as Keyboard Controller) + */ + +#pragma once + +#include "types.h" +#include "object/key.h" + +/*! \brief PS/2 Controller + * \ingroup io + * + * Initializes the PS/2 devices (Keyboard and optional Mouse), and + * determines both the scan code and ASCII character of a pressed key from the + * transmitted make and break codes using the \ref KeyDecoder. + * + * \note This controller is also known as Intel 8042 (nowadays integrated in + * the mainboard) or *Keyboard Controller*. + * But to avoid confusion with the actual Keyboard and since we use the + * PS/2-compatible mode to support the Mouse as well, the name + * PS/2 Controller was chosen for the sake of simplicity. + * + * \note Since modern PCs sometimes don't have an PS/2 connector, USB keyboards + * and mice are emulated as PS/2 device with USB Legacy Support. + */ +namespace PS2Controller { +/*! \brief Initialization of connected devices + * + * All status LEDs of the keyboard are switched off and the repetition rate is + * set to maximum speed. + */ +void init(); + +/*! \brief Retrieve the keyboard event + * + * Retrieves make and brake events from the keyboard. + * If a valid (non special) key was pressed, the scan code is determined + * using \ref KeyDecoder::decode into a \ref Key object. + * Events on special keys like \key{Shift}, \key{Alt}, \key{CapsLock} etc. are stored + * (in \ref KeyDecoder) and applied on subsequent keystrokes, + * while no valid key is retrieved. + * + * Mouse events are ignored. + * + * \todo Implement Method + * \param pressed Reference to an object which will contain the pressed \ref Key on success + * \return `true` if a valid key was decoded + */ +bool fetch(Key &pressed); + +/*! \brief Delay before the keyboard starts repeating sending a pressed key + */ +enum Delay { + DELAY_250MS = 0, ///< Delay of 0.25s + DELAY_500MS = 1, ///< Delay of 0.5s + DELAY_750MS = 2, ///< Delay of 0.75s + DELAY_1000MS = 3 ///< Delay of 1s +}; + +/*! \brief Repeat Rate of Characters + * + * \see \ref ps2keyboard + */ +enum Speed { + SPEED_30_0CPS = 0x00, ///< 30 characters per second + SPEED_26_7CPS = 0x01, ///< 26.7 characters per second + SPEED_24_0CPS = 0x02, ///< 24 characters per second + SPEED_21_8CPS = 0x03, ///< 12.8 characters per second + SPEED_20_7CPS = 0x04, ///< 20.7 characters per second + SPEED_18_5CPS = 0x05, ///< 18.5 characters per second + SPEED_17_1CPS = 0x06, ///< 17.1 characters per second + SPEED_16_0CPS = 0x07, ///< 16 characters per second + SPEED_15_0CPS = 0x08, ///< 15 characters per second + SPEED_13_3CPS = 0x09, ///< 13.3 characters per second + SPEED_12_0CPS = 0x0a, ///< 12 characters per second + SPEED_10_9CPS = 0x0b, ///< 10.9 characters per second + SPEED_10_0CPS = 0x0c, ///< 10 characters per second + SPEED_09_2CPS = 0x0d, ///< 9.2 characters per second + SPEED_08_6CPS = 0x0e, ///< 8.6 characters per second + SPEED_08_0CPS = 0x0f, ///< 8 characters per second + SPEED_07_5CPS = 0x10, ///< 7.5 characters per second + SPEED_06_7CPS = 0x11, ///< 6.7 characters per second + SPEED_06_0CPS = 0x12, ///< 6 characters per second + SPEED_05_5CPS = 0x13, ///< 5.5 characters per second + SPEED_05_0CPS = 0x14, ///< 5 characters per second + SPEED_04_6CPS = 0x15, ///< 4.6 characters per second + SPEED_04_3CPS = 0x16, ///< 4.3 characters per second + SPEED_04_0CPS = 0x17, ///< 4 characters per second + SPEED_03_7CPS = 0x18, ///< 3.7 characters per second + SPEED_03_3CPS = 0x19, ///< 3.3 characters per second + SPEED_03_0CPS = 0x1a, ///< 3 characters per second + SPEED_02_7CPS = 0x1b, ///< 2.7 characters per second + SPEED_02_5CPS = 0x1c, ///< 2.5 characters per second + SPEED_02_3CPS = 0x1d, ///< 2.3 characters per second + SPEED_02_1CPS = 0x1e, ///< 2.1 characters per second + SPEED_02_0CPS = 0x1f, ///< 2 characters per second +}; + +/*! \brief Configure the repeat rate of the keyboard + * + * \param delay configures how long a key must be pressed before the repetition begins. + * \param speed determines how fast the key codes should follow each other. + * Valid values are between `0` (30 characters per second) and + * `31` (2 characters per second). + * + * \todo Implement method + */ +void setRepeatRate(Speed speed, Delay delay); + +/*! \brief Keyboard LEDs + */ +enum LED { + LED_SCROLL_LOCK = 1 << 0, ///< Scroll Lock + LED_NUM_LOCK = 1 << 1, ///< Num Lock + LED_CAPS_LOCK = 1 << 2, ///< Caps Lock +}; + +/*! \brief Enable or disable a keyboard LED + * + * \param led LED to enable or disable + * \param on `true` will enable the specified LED, `false` disable + * + * \todo Implement method + */ +void setLed(enum LED led, bool on); + +} // namespace PS2Controller diff --git a/machine/serial.cc b/machine/serial.cc new file mode 100644 index 0000000..fa57ae0 --- /dev/null +++ b/machine/serial.cc @@ -0,0 +1,35 @@ +#include "machine/serial.h" + +Serial::Serial(ComPort port, BaudRate baud_rate, DataBits data_bits, StopBits stop_bits, Parity parity) : port(port) { + // TODO: Implement + (void) baud_rate; + (void) data_bits; + (void) stop_bits; + (void) parity; +} + +void Serial::writeReg(RegisterIndex reg, char out) { + // TODO: Implement (if you want, optional exercise) + (void) reg; + (void) out; +} + +char Serial::readReg(RegisterIndex reg) { + // TODO: Implement (if you want, optional exercise) + (void) reg; + return '\0'; +} + +int Serial::write(char out, bool blocking) { + // TODO: Implement (if you want, optional exercise) + (void) out; + (void) blocking; + return 0; +} + +int Serial::read(bool blocking) { + // TODO: Implement (if you want, optional exercise) + (void) blocking; + return 0; +} + diff --git a/machine/serial.h b/machine/serial.h new file mode 100644 index 0000000..f5467f6 --- /dev/null +++ b/machine/serial.h @@ -0,0 +1,209 @@ +/*! \file + * \brief Communication via the \ref Serial interface (RS-232) + */ + +#pragma once + +#include "types.h" + +/*! \brief Serial interface. + * \ingroup io + * + * This class provides a serial interface (COM1 - COM4) for communication with the outside world. + * + * The first IBM PC used the external chip [8250 UART](https://de.wikipedia.org/wiki/NSC_8250), whereas, in today's + * systems, this functionality is commonly integrated into the southbridge, but remained compatible. + * + * \see [PC8250A Data Sheet](uart-8250a.pdf#page=11) (Registers on page 11) + * \see [PC16550D Data Sheet](uart-16550d.pdf#page=16) (Successor, for optional FIFO buffer, page 16) + */ + +class Serial { + public: + /*! \brief COM-Port + * + * The serial interface and its hardware addresses. Modern desktop PCs have, at most, + * a single, physical COM-port (`COM1`) + */ + enum ComPort{ + COM1 = 0x3f8, + COM2 = 0x2f8, + COM3 = 0x3e8, + COM4 = 0x2e8, + }; + + /*! \brief Transmission speed + * + * The unit Baud describes the transmission speed in number of symbols per seconds. + * 1 Baud therefore equals the transmission of 1 symbol per second. + * The possible Baud rates are whole-number dividers of the clock frequency of 115200 Hz.. + */ + enum BaudRate { + BAUD_300 = 384, + BAUD_600 = 192, + BAUD_1200 = 96, + BAUD_2400 = 48, + BAUD_4800 = 24, + BAUD_9600 = 12, + BAUD_19200 = 6, + BAUD_38400 = 3, + BAUD_57600 = 2, + BAUD_115200 = 1, + }; + + /*! \brief Number of data bits per character */ + enum DataBits { + DATA_5BIT = 0, + DATA_6BIT = 1, + DATA_7BIT = 2, + DATA_8BIT = 3, + }; + + /*! \brief Number of stop bits per character */ + enum StopBits { + STOP_1BIT = 0, + STOP_1_5BIT = 4, + STOP_2BIT = 4, + }; + + /*! \brief parity bit */ + enum Parity { + PARITY_NONE = 0, + PARITY_ODD = 8, + PARITY_EVEN = 24, + PARITY_MARK = 40, + PARITY_SPACE = 56, + }; + + private: + /*! \brief register index */ + enum RegisterIndex { + // if Divisor Latch Access Bit [DLAB] = 0 + RECEIVE_BUFFER_REGISTER = 0, ///< read only + TRANSMIT_BUFFER_REGISTER = 0, ///< write only + INTERRUPT_ENABLE_REGISTER = 1, + + // if Divisor Latch Access Bit [DLAB] = 1 + DIVISOR_LOW_REGISTER = 0, + DIVISOR_HIGH_REGISTER = 1, + + // (irrespective from DLAB) + INTERRUPT_IDENT_REGISTER = 2, ///< read only + FIFO_CONTROL_REGISTER = 2, ///< write only -- 16550 and newer (esp. not 8250a) + LINE_CONTROL_REGISTER = 3, ///< highest-order bit is DLAB (see above) + MODEM_CONTROL_REGISTER = 4, + LINE_STATUS_REGISTER = 5, + MODEM_STATUS_REGISTER = 6 + }; + + /*! \brief Mask for the respective register */ + enum RegisterMask : uint8_t { + // Interrupt Enable Register + RECEIVED_DATA_AVAILABLE = 1 << 0, + TRANSMITTER_HOLDING_REGISTER_EMPTY = 1 << 1, + RECEIVER_LINE_STATUS = 1 << 2, + MODEM_STATUS = 1 << 3, + + // Interrupt Ident Register + INTERRUPT_PENDING = 1 << 0, ///< 0 means interrupt pending + INTERRUPT_ID_0 = 1 << 1, + INTERRUPT_ID_1 = 1 << 2, + + // FIFO Control Register + ENABLE_FIFO = 1 << 0, ///< 0 means disabled ^= conforming to 8250a + CLEAR_RECEIVE_FIFO = 1 << 1, + CLEAR_TRANSMIT_FIFO = 1 << 2, + DMA_MODE_SELECT = 1 << 3, + TRIGGER_RECEIVE = 1 << 6, + + // Line Control Register + // bits per character: 5 6 7 8 + WORD_LENGTH_SELECT_0 = 1 << 0, // Setting Select0: 0 1 0 1 + WORD_LENGTH_SELECT_1 = 1 << 1, // Setting Select1: 0 0 1 1 + NUMBER_OF_STOP_BITS = 1 << 2, // 0 ≙ one stop bit, 1 ≙ 1.5/2 stop bits + PARITY_ENABLE = 1 << 3, + EVEN_PARITY_SELECT = 1 << 4, + STICK_PARITY = 1 << 5, + SET_BREAK = 1 << 6, + DIVISOR_LATCH_ACCESS_BIT = 1 << 7, // DLAB + + // Modem Control Register + DATA_TERMINAL_READY = 1 << 0, + REQUEST_TO_SEND = 1 << 1, + OUT_1 = 1 << 2, + OUT_2 = 1 << 3, // must be set for interrupts! + LOOP = 1 << 4, + + // Line Status Register + DATA_READY = 1 << 0, // Set when there is a value in the receive buffer + OVERRUN_ERROR = 1 << 1, + PARITY_ERROR = 1 << 2, + FRAMING_ERROR = 1 << 3, + BREAK_INTERRUPT = 1 << 4, + TRANSMITTER_HOLDING_REGISTER = 1 << 5, + TRANSMITTER_EMPTY = 1 << 6, // Send buffer empty (ready to send) + + // Modem Status Register + DELTA_CLEAR_TO_SEND = 1 << 0, + DELTA_DATA_SET_READY = 1 << 1, + TRAILING_EDGE_RING_INDICATOR = 1 << 2, + DELTA_DATA_CARRIER_DETECT = 1 << 3, + CLEAR_TO_SEND = 1 << 4, + DATA_SET_READY = 1 << 5, + RING_INDICATOR = 1 << 6, + DATA_CARRIER_DETECT = 1 << 7 + }; + + /*! \brief Read value from register + * + * \todo Implement Method + * + * \param reg Register index + * \return The value read from register + */ + char readReg(RegisterIndex reg); + + /*! \brief Write value to register + * + * \todo Implement Method + * + * \param reg Register index + * \param out value to be written + */ + void writeReg(RegisterIndex reg, char out); + + protected: + /*! \brief Selected COM port */ + const ComPort port; + + public: + /*! \brief Constructor + * + * Creates a Serial object that encapsulates the used COM port, as well as the parameters used for the + * serial connection. Default values are `8N1` (8 bit, no parity bit, one stop bit) with 115200 Baud using COM1. + * + * \todo Implement Constructor + */ + explicit Serial(ComPort port = COM1, BaudRate baud_rate = BAUD_115200, DataBits data_bits = DATA_8BIT, + StopBits stop_bits = STOP_1BIT, Parity parity = PARITY_NONE); + + /*! \brief Read one byte from the serial interface + * + * \todo Implement Method + * + * \param blocking If set, \ref read() blocks until one byte was read + * \return Value read from serial interface (or `-1 ` if non-blocking and no data ready) + */ + int read(bool blocking = true); + + /*! \brief Write one byte to the serial interface + * + * \todo Implement Method + * + * \param out Byte to be written + * \param blocking If set, \ref write() blocks until the byte was written + * \return Byte written (or `-1` if writing byte failed) + */ + int write(char out, bool blocking = true); + +}; diff --git a/machine/system.cc b/machine/system.cc new file mode 100644 index 0000000..e6b606c --- /dev/null +++ b/machine/system.cc @@ -0,0 +1,15 @@ +#include "machine/system.h" +#include "machine/cmos.h" +#include "machine/ioport.h" +#include "debug/output.h" + +namespace System { + +void reboot() { + const IOPort system_control_port_a(0x92); + DBG_VERBOSE << "rebooting smp" << endl; + CMOS::write(CMOS::REG_STATUS_SHUTDOWN, 0); + system_control_port_a.outb(0x3); +} + +} // namespace System diff --git a/machine/system.h b/machine/system.h new file mode 100644 index 0000000..6557e33 --- /dev/null +++ b/machine/system.h @@ -0,0 +1,14 @@ +/*! \file + * \brief General \ref System functionality (\ref System::reboot "reboot") + */ + +#pragma once + +/*! \brief General System functions + */ +namespace System { + +/*! \brief Perform a reboot + */ +void reboot(); +} // namespace System diff --git a/machine/textmode.h b/machine/textmode.h new file mode 100644 index 0000000..b20342a --- /dev/null +++ b/machine/textmode.h @@ -0,0 +1,119 @@ +/*! \file + * \brief \ref TextMode provides a basic interface to display a character in VGA-compatible text mode + */ + +#pragma once + +#include "types.h" + +/*! \brief Basic operations in the VGA-compatible text mode + * \ingroup io + * + * This class provides an interface to access the screen in text mode, with + * access directly on the hardware level, i.e. the video memory and the + * I/O ports of the graphics card. + */ +class TextMode { + public: + static const unsigned ROWS = 25; ///< Visible rows in text mode + static const unsigned COLUMNS = 80; ///< Visible columns in text mode + + /*! \brief CGA color palette + * + * Colors for the attribute byte. + * All 16 colors can be used for the foreground while the background colors + * are limited to the first eight (from`BLACK` to `LIGHT_GREY`) + */ + enum Color { + BLACK, ///< Black (fore- and background) + BLUE, ///< Blue (fore- and background) + GREEN, ///< Green (fore- and background) + CYAN, ///< Cyan (fore- and background) + RED, ///< Red (fore- and background) + MAGENTA, ///< Magenta (fore- and background) + BROWN, ///< Brown (fore- and background) + LIGHT_GREY, ///< Light grey (fore- and background) + DARK_GREY, ///< Dark grey (foreground only) + LIGHT_BLUE, ///< Light blue (foreground only) + LIGHT_GREEN, ///< Light green (foreground only) + LIGHT_CYAN, ///< Light cyan (foreground only) + LIGHT_RED, ///< Light red (foreground only) + LIGHT_MAGENTA, ///< Light magenta (foreground only) + YELLOW, ///< Yellow (foreground only) + WHITE ///< White (foreground only) + }; + + /*! \brief Structure of a character attribute + * consists of 4 bit fore- and 3 bit background color, and a single blink bit. + * + * [Bit fields](https://en.cppreference.com/w/cpp/language/bit_field) can + * notably simplify the access and code readability. + * + * \note [Type punning](https://en.wikipedia.org/wiki/Type_punning#Use_of_union) + * is indeed undefined behavior in C++. However, *gcc* explicitly allows this construct as a + * [language extension](https://gcc.gnu.org/bugs/#nonbugs). + * Some compilers ([other than gcc](https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#Type%2Dpunning) + * might allow this feature only by disabling strict aliasing (`-fno-strict-aliasing`). + * In \StuBS we use this feature extensively due to the improved code readability. + */ + union Attribute { + struct { + uint8_t foreground : 4; ///< `.... XXXX` Foreground color + uint8_t background : 3; ///< `.XXX ....` Background color + uint8_t blink : 1; ///< `X... ....` Blink + } __attribute__((packed)); + uint8_t value; ///< combined value + + /*! \brief Attribute constructor (with default values) + * + * \todo Complete constructor + * + * \param foreground Foreground color (Default: \ref LIGHT_GREY) + * \param background Background color (Default: \ref BLACK) + * \param blink Blink if `true` (default: no blinking) + */ + explicit Attribute(Color foreground = LIGHT_GREY, Color background = BLACK, bool blink = false) + { //NOLINT + (void) foreground; + (void) background; + (void) blink; + } + + } __attribute__((packed)); // prevent padding by the compiler + + /*! \brief Set the keyboard hardware cursor to absolute screen position + * + * \todo Implement the method using \ref IOPort + * + * \param abs_x absolute column of the keyboard hardware cursor + * \param abs_y absolute row of the keyboard hardware cursor + */ + static void setCursor(unsigned abs_x, unsigned abs_y); + + /*! \brief Retrieve the keyboard hardware cursor position on screen + * + * \todo Implement the method using the \ref IOPort + * + * \param abs_x absolute column of the keyboard hardware cursor + * \param abs_y absolute row of the keyboard hardware cursor + */ + static void getCursor(unsigned& abs_x, unsigned& abs_y); + + /*! \brief Basic output of a character at a specific position on the screen. + * + * This method outputs the given character at the absolute screen position + * (`x`, `y`) with the specified color attribute. + * + * The position (`0`,`0`) indicates the upper left corner of the screen. + * The attribute defines characteristics such as background color, + * foreground color and blinking. + * + * \param abs_x Column (`abs_x` < \ref COLUMNS) in which the character should be displayed + * \param abs_y Row (`abs_y` < \ref ROWS) in which the character should be displayed + * \param character Character to be displayed + * \param attrib Attribute with color settings + * \todo Implement the method + */ + static void show(unsigned abs_x, unsigned abs_y, char character, Attribute attrib = Attribute()); + +}; diff --git a/machine/textwindow.h b/machine/textwindow.h new file mode 100644 index 0000000..1ac98c9 --- /dev/null +++ b/machine/textwindow.h @@ -0,0 +1,112 @@ +/*! \file + * \brief \ref TextWindow provides virtual output windows in text mode + */ + +#pragma once + +#include "types.h" +#include "machine/textmode.h" + +/*! \brief Virtual windows in text mode + * \ingroup io + * + * Outputs text on a part of the screen in \ref TextMode "text mode", + * a window defined in its position and size (with its own cursor). + * + * This allows to separate the output of the application from the debug output + * on the screen without having to synchronize. + */ +class TextWindow : public TextMode { + // Prevent copies and assignments + TextWindow(const TextWindow&) = delete; + TextWindow& operator=(const TextWindow&) = delete; + + public: + /*! \brief Constructor of a text window + * + * Creates a virtual, rectangular text window on the screen. + * The coordinates to construct the window are absolute positions in the + * \ref TextMode screen. + * + * \note Overlapping windows are neither supported nor prevented -- better + * just try to avoid construction windows with overlapping coordinates! + * + * \warning Don't use the hardware cursor in more than one window! + * + * \param from_col Text Window starts in column `from_col`, + * the first (leftmost) possible column is `0` + * \param to_col Text Window extends to the right to column `to_col` (exclusive). + * This column has to be strictly greater than `from_col`, + * the maximum allowed value is \ref TextMode::COLUMNS (rightmost) + * \param from_row Text Window starts in row `from_row`, + * the first possible (uppermost) row is `0` + * \param to_row Text Window extends down to row `to_row` (exclusive). + * This row has to be strictly greater than `from_row`, + * the maximum allowed value is \ref TextMode::ROWS (bottom-most) + * \param use_cursor Specifies whether the hardware cursor (`true`) or a + * software cursor/variable (`false`) should be used to + * store the current position + * + * \todo Implement constructor + */ + TextWindow(unsigned from_col, unsigned to_col, unsigned from_row, unsigned to_row, bool use_cursor = false); + + /*! \brief Set the cursor position in the window + * + * Depending on the constructor parameter `use_cursor` either the + * hardware cursor (and only the hardware cursor!) is used or the position + * is stored internally in the object. + * + * The coordinates are relative to the upper left starting position of + * the window. + * + * \param rel_x Column in window + * \param rel_y Row in window + * \todo Implement method, use \ref TextMode::setCursor() for the hardware cursor + */ + void setPos(unsigned rel_x, unsigned rel_y); + + /*! \brief Get the current cursor position in the window + * + * Depending on the constructor parameter `use_cursor` either the + * hardware cursor (and only the hardware cursor!) is used or the position + * is retrieved from the internally stored object. + * + * \param rel_x Column in window + * \param rel_y Row in window + * \todo Implement Method, use \ref TextMode::getCursor() for the hardware cursor + */ + void getPos(unsigned& rel_x, unsigned& rel_y) const; + + /*! \brief Display multiple characters in the window + * + * Output a character string, starting at the current cursor position. + * Since the string does not need to contain a `\0` termination (unlike the + * common C string), a length parameter is required to specify the number + * of characters in the string. + * When the output is complete, the cursor is positioned after the last + * printed character. + * The same attributes (colors) are used for the entire text. + * + * If there is not enough space left at the end of the line, + * the output continues on the following line. + * As soon as the last window line is filled, the entire window area is + * moved up one line: The first line disappears, the bottom line is cleared. + * + * A line break also occurs whenever the character `\n` appears in the text. + * + * \param string Text to be printed + * \param length Length of text + * \param attrib Attribute for text + * \todo Implement Method + */ + void print(const char* string, size_t length, Attribute attrib = TextMode::Attribute()); //NOLINT + + /*! \brief Delete all contents in the window and reset the cursor. + * + * \param character Fill character + * \param attrib Attribute for fill character + * \todo Implement Method + */ + void reset(char character = ' ', Attribute attrib = TextMode::Attribute()); +}; diff --git a/main.cc b/main.cc new file mode 100644 index 0000000..c1f1bb6 --- /dev/null +++ b/main.cc @@ -0,0 +1,25 @@ +#include "boot/startup_ap.h" +#include "machine/lapic.h" +#include "debug/output.h" + +const char * os_name = "MP" "StuBS"; + +// Main function (the bootstrap processor starts here) +extern "C" int main() { + + unsigned int num_cpus = Core::count(); + DBG_VERBOSE << "Number of CPUs: " << num_cpus << endl; + + // Start application processors + ApplicationProcessor::boot(); + + return 0; +} + +// Main function for application processors +extern "C" int main_ap() { + DBG_VERBOSE << "CPU core " << static_cast(Core::getID()) + << " / LAPIC " << static_cast(LAPIC::getID()) << " in main_ap()" << endl; + + return 0; +} diff --git a/object/key.cc b/object/key.cc new file mode 100644 index 0000000..146bbd3 --- /dev/null +++ b/object/key.cc @@ -0,0 +1,123 @@ +#include "object/key.h" + +// Character table for scan codes for US keyboards +static struct { + const unsigned char normal, // Character without modifiers + shift, // Character with pressed Shift, Capslock, or in Numpad + alt; // Character with pressed Alt key +} ascii_tab[Key::Scancode::KEYS] = { + { 0, 0, 0 }, // KEY_INVALID + { 0, 0, 0 }, // KEY_ESCAPE + { '1', '!', 0 }, // KEY_1 + { '2', '"', 253 }, // KEY_2 + { '3', 21, 0 }, // KEY_3 + { '4', '$', 0 }, // KEY_4 + { '5', '%', 0 }, // KEY_5 + { '6', '&', 0 }, // KEY_6 + { '7', '/', '{' }, // KEY_7 + { '8', '(', '[' }, // KEY_8 + { '9', ')', ']' }, // KEY_9 + { '0', '=', '}' }, // KEY_0 + { 225, '?', '\\'}, // KEY_DASH + { 39, 96, 0 }, // KEY_EQUAL + {'\b', 0, 0 }, // KEY_BACKSPACE + { 0, 0, 0 }, // KEY_TAB + { 'q', 'Q', '@' }, // KEY_Q + { 'w', 'W', 0 }, // KEY_W + { 'e', 'E', 0 }, // KEY_E + { 'r', 'R', 0 }, // KEY_R + { 't', 'T', 0 }, // KEY_T + { 'z', 'Z', 0 }, // KEY_Y + { 'u', 'U', 0 }, // KEY_U + { 'i', 'I', 0 }, // KEY_I + { 'o', 'O', 0 }, // KEY_O + { 'p', 'P', 0 }, // KEY_P + { 129, 154, 0 }, // KEY_OPEN_BRACKET + { '+', '*', '~' }, // KEY_CLOSE_BRACKET + {'\n', 0, 0 }, // KEY_ENTER + { 0, 0, 0 }, // KEY_LEFT_CTRL + { 'a', 'A', 0 }, // KEY_A + { 's', 'S', 0 }, // KEY_S + { 'd', 'D', 0 }, // KEY_D + { 'f', 'F', 0 }, // KEY_F + { 'g', 'G', 0 }, // KEY_G + { 'h', 'H', 0 }, // KEY_H + { 'j', 'J', 0 }, // KEY_J + { 'k', 'K', 0 }, // KEY_K + { 'l', 'L', 0 }, // KEY_L + { 148, 153, 0 }, // KEY_SEMICOLON + { 132, 142, 0 }, // KEY_APOSTROPH + { '^', 248, 0 }, // KEY_GRAVE_ACCENT + { 0, 0, 0 }, // KEY_LEFT_SHIFT + { '#', 39, 0 }, // KEY_BACKSLASH + { 'y', 'Y', 0 }, // KEY_Z + { 'x', 'X', 0 }, // KEY_X + { 'c', 'C', 0 }, // KEY_C + { 'v', 'V', 0 }, // KEY_V + { 'b', 'B', 0 }, // KEY_B + { 'n', 'N', 0 }, // KEY_N + { 'm', 'M', 230 }, // KEY_M + { ',', ';', 0 }, // KEY_COMMA + { '.', ':', 0 }, // KEY_PERIOD + { '-', '_', 0 }, // KEY_SLASH + { 0, 0, 0 }, // KEY_RIGHT_SHIFT + { '*', '*', 0 }, // KEY_KP_STAR + { 0, 0, 0 }, // KEY_LEFT_ALT + { ' ', ' ', 0 }, // KEY_SPACEBAR + { 0, 0, 0 }, // KEY_CAPS_LOCK + { 0, 0, 0 }, // KEY_F1 + { 0, 0, 0 }, // KEY_F2 + { 0, 0, 0 }, // KEY_F3 + { 0, 0, 0 }, // KEY_F4 + { 0, 0, 0 }, // KEY_F5 + { 0, 0, 0 }, // KEY_F6 + { 0, 0, 0 }, // KEY_F7 + { 0, 0, 0 }, // KEY_F8 + { 0, 0, 0 }, // KEY_F9 + { 0, 0, 0 }, // KEY_F10 + { 0, 0, 0 }, // KEY_NUM_LOCK + { 0, 0, 0 }, // KEY_SCROLL_LOCK + { 0, '7', 0 }, // KEY_KP_7 + { 0, '8', 0 }, // KEY_KP_8 + { 0, '9', 0 }, // KEY_KP_9 + { '-', '-', 0 }, // KEY_KP_DASH + { 0, '4', 0 }, // KEY_KP_4 + { 0, '5', 0 }, // KEY_KP_5 + { 0, '6', 0 }, // KEY_KP_6 + { '+', '+', 0 }, // KEY_KP_PLUS + { 0, '1', 0 }, // KEY_KP_1 + { 0, '2', 0 }, // KEY_KP_2 + { 0, '3', 0 }, // KEY_KP_3 + { 0, '0', 0 }, // KEY_KP_0 + { 127, ',', 0 }, // KEY_KP_PERIOD + { 0, 0, 0 }, // KEY_SYSREQ + { 0, 0, 0 }, // KEY_EUROPE_2 + { '<', '>', '|' }, // KEY_F11 + { 0, 0, 0 }, // KEY_F12 + { 0, 0, 0 }, // KEY_KP_EQUAL +}; + +unsigned char Key::ascii() const { + // Select the correct table depending on the modifier bits. + // For the sake of simplicity, Shift and NumLock have precedence over Alt. + // The Ctrl modifier does not have a distinct table. + + if (!valid()) { + return '\0'; + } else if (shift + || (caps_lock + && ( + (scancode >= KEY_Q && scancode <= KEY_P) + || (scancode >= KEY_A && scancode <= KEY_L) + || (scancode >= KEY_Z && scancode <= KEY_M) + ) + ) + || (num_lock && scancode >= KEY_KP_7 && scancode <= KEY_KP_PERIOD) + ) { + return ascii_tab[scancode].shift; + } else if (alt()) { + return ascii_tab[scancode].alt; + } else { + return ascii_tab[scancode].normal; + } +} diff --git a/object/key.h b/object/key.h new file mode 100644 index 0000000..3d96737 --- /dev/null +++ b/object/key.h @@ -0,0 +1,177 @@ +/*! \file + * \brief \ref Key, an abstraction for handling pressed keys and their modifiers + */ + +#pragma once + +#include "types.h" + +/*! \brief Class that abstracts a key, made up of the scan code and the modifier bits. + */ +struct Key { + /*! \brief The keys' scan codes (code 1) + */ + enum Scancode : uint8_t { + // Invalid scan code + KEY_INVALID = 0, + + // "real" valid scan codes + KEY_ESCAPE, + KEY_1, + KEY_2, + KEY_3, + KEY_4, + KEY_5, + KEY_6, + KEY_7, + KEY_8, + KEY_9, + KEY_0, + KEY_DASH, + KEY_EQUAL, + KEY_BACKSPACE, + KEY_TAB, + KEY_Q, + KEY_W, + KEY_E, + KEY_R, + KEY_T, + KEY_Y, + KEY_U, + KEY_I, + KEY_O, + KEY_P, + KEY_OPEN_BRACKET, + KEY_CLOSE_BRACKET, + KEY_ENTER, + KEY_LEFT_CTRL, + KEY_A, + KEY_S, + KEY_D, + KEY_F, + KEY_G, + KEY_H, + KEY_J, + KEY_K, + KEY_L, + KEY_SEMICOLON, + KEY_APOSTROPH, + KEY_GRAVE_ACCENT, + KEY_LEFT_SHIFT, + KEY_BACKSLASH, + KEY_Z, + KEY_X, + KEY_C, + KEY_V, + KEY_B, + KEY_N, + KEY_M, + KEY_COMMA, + KEY_PERIOD, + KEY_SLASH, + KEY_RIGHT_SHIFT, + KEY_KP_STAR, + KEY_LEFT_ALT, + KEY_SPACEBAR, + KEY_CAPS_LOCK, + KEY_F1, + KEY_F2, + KEY_F3, + KEY_F4, + KEY_F5, + KEY_F6, + KEY_F7, + KEY_F8, + KEY_F9, + KEY_F10, + KEY_NUM_LOCK, + KEY_SCROLL_LOCK, + KEY_KP_7, + KEY_KP_8, + KEY_KP_9, + KEY_KP_DASH, + KEY_KP_4, + KEY_KP_5, + KEY_KP_6, + KEY_KP_PLUS, + KEY_KP_1, + KEY_KP_2, + KEY_KP_3, + KEY_KP_0, + KEY_KP_PERIOD, + KEY_SYSREQ, + KEY_EUROPE_2, + KEY_F11, + KEY_F12, + KEY_KP_EQUAL, + + // Number of keys (excluding aliases below) + KEYS, + + // aliases + KEY_DIV = KEY_7, + KEY_DEL = KEY_KP_PERIOD, + KEY_UP = KEY_KP_8, + KEY_DOWN = KEY_KP_2, + KEY_LEFT = KEY_KP_4, + KEY_RIGHT = KEY_KP_6, + }; + + Scancode scancode; + + // bit masks for the modifier keys + bool shift : 1, + alt_left : 1, + alt_right : 1, + ctrl_left : 1, + ctrl_right : 1, + caps_lock : 1, + num_lock : 1, + scroll_lock : 1; + + /*! \brief Default constructor: Instantiates an invalid key by setting ASCII, scan code, and modifier bits to 0 + */ + Key() : scancode(KEY_INVALID), shift(false), alt_left(false), alt_right(false), + ctrl_left(false), ctrl_right(false), + caps_lock(false), num_lock(false), scroll_lock(false) {} + + /*! \brief Invalid keys have a scancode = 0 + * \return Checks whether a key is valid. + */ + bool valid() const { + return scancode != KEY_INVALID && scancode < KEYS; + } + + /*! \brief Marks the key as invalid by setting the scan code to 0. + * + */ + void invalidate() { + scancode = KEY_INVALID; + } + + /*! \brief Get the key's ASCII value + * \return the key's ASCII value + */ + unsigned char ascii() const; + + /*! \brief Indicates whether the ALT modifier is set + * \return `true` if ALT key was pressed during key press + */ + bool alt() const { + return alt_left || alt_right; + } + + /*! \brief Indicates whether the CTRL modifier is set + * \return `true` if CTRL key was pressed during key press + */ + bool ctrl() const { + return ctrl_left || ctrl_right; + } + + /*! \brief Conversion to char (ASCII code) + * + */ + operator char() const { //NOLINT since we want implicit conversions + return static_cast(ascii()); + } +}; diff --git a/tools/build.mk b/tools/build.mk new file mode 100644 index 0000000..312196f --- /dev/null +++ b/tools/build.mk @@ -0,0 +1,143 @@ +# Build the kernel + +# Folder the generated files will be placed in. +BUILDDIR ?= .build +# Build folder suffixes +OPTTAG = -opt +NOOPTTAG = -noopt +DBGTAG = -dbg +VERBOSETAG = -verbose + +# C++ +CXX = $(PREFIX)g++ + +CXXFLAGS_ARCH = -m64 + +CXXFLAGS_DEFAULT = -std=c++14 -ffreestanding -fno-pic -nodefaultlibs -nostdlib -nostdinc -I. -fno-rtti -fno-exceptions -Wno-write-strings -fno-stack-protector -mno-red-zone -g -gdwarf-2 +CXXFLAGS_OPT = -O3 -fomit-frame-pointer +CXXFLAGS_WARNING = -Wall -Wextra -Werror -Wno-error=unused-parameter -Wno-non-virtual-dtor +CXXFLAGS_CLANG = -no-pie -Wno-error=unused-private-field -Wno-implicit-exception-spec-mismatch -Wno-error=unused-const-variable -Wno-unused-command-line-argument -Wno-unused-const-variable -fno-strict-aliasing +CXXFLAGS_GCC = -fno-tree-loop-distribute-patterns -no-pie -nostartfiles -Wstack-usage=1024 -Wno-error=stack-usage= -fno-threadsafe-statics +CXXFLAGS_NOFPU = -mno-mmx -mno-sse -mgeneral-regs-only +CXXFLAGS = $(CXXFLAGS_ARCH) $(CXXFLAGS_DEFAULT) $(CXXFLAGS_OPT) $(CXXFLAGS_NOFPU) $(CXXFLAGS_WARNING) +# Compiler specific flags +ifneq (,$(findstring clang,$(CXX))) + COMPILER := CLANG + CXXFLAGS += $(CXXFLAGS_CLANG) +else ifneq (,$(findstring g++,$(CXX))) + COMPILER := GCC + # g++ 6 does not support general-regs-only flag + ifeq "$(shell expr `$(CXX) -dumpversion | cut -f1 -d.` \<= 6)" "1" + CXXFLAGS := $(filter-out -mgeneral-regs-only,$(CXXFLAGS)) + endif + CXXFLAGS += $(CXXFLAGS_GCC) +else + COMPILER := +endif + +# Assembly +ASM = nasm +ASMFLAGS = -f elf64 + +# Additional build utilities +OBJCOPY = $(PREFIX)objcopy +STRIP = $(PREFIX)strip +AR = $(PREFIX)ar + +# C Runtime objects +CRTBEGIN_OBJECT = $(shell $(CXX) $(CXXFLAGS) --print-file-name=crtbegin.o) +CRTEND_OBJECT = $(shell $(CXX) $(CXXFLAGS) --print-file-name=crtend.o) +# GCC library + # Attention: libgcc.a must not use red-zone! +LIBGCC = $(shell $(CXX) $(CXXFLAGS) -print-libgcc-file-name ) + +# Subdirectories with sources +VPATH = $(sort $(dir $(CC_SOURCES) $(ASM_SOURCES))) + +# Lists of object files that are generated by compilation: +# Note that the variables containing the input files are to be defined by +# the Makefiles prior to including this common.mk. +ifdef CRTI_SOURCE + CRTI_OBJECT = $(addprefix $(BUILDDIR)/,$(addsuffix .o,$(CRTI_SOURCE))) +else + CRTI_OBJECT = $(shell $(CXX) $(CXXFLAGS) --print-file-name=crti.o) +endif +ifdef CRTN_SOURCE + CRTN_OBJECT = $(addprefix $(BUILDDIR)/,$(addsuffix .o,$(CRTN_SOURCE))) +else + CRTN_OBJECT = $(shell $(CXX) $(CXXFLAGS) --print-file-name=crtn.o) +endif +CC_OBJECTS = $(addprefix $(BUILDDIR)/,$(CC_SOURCES:.cc=.o)) +DEP_FILES = $(addprefix $(BUILDDIR)/,$(CC_SOURCES:.cc=.d) $(addsuffix .d,$(ASM_SOURCES))) +ASM_OBJECTS = $(addprefix $(BUILDDIR)/,$(addsuffix .o,$(filter-out $(CRTI_SOURCE) $(CRTN_SOURCE),$(ASM_SOURCES)))) + +# Dependency files +$(BUILDDIR)/%.d : %.cc $(MAKEFILE_LIST) + @echo "DEP $<" + @mkdir -p $(@D) + $(VERBOSE) $(CXX) $(CXXFLAGS) -MM -MT $(BUILDDIR)/$*.o -MF $@ $< + +$(BUILDDIR)/%.asm.d : %.asm $(MAKEFILE_LIST) + @echo "DEP $<" + @mkdir -p $(@D) + $(VERBOSE) $(ASM) $(ASMFLAGS) -M -MT $(BUILDDIR)/$*.asm.o -MF $@ $< + +# Object files +$(BUILDDIR)/%.o : %.cc $(MAKEFILE_LIST) + @echo "CXX $<" + @mkdir -p $(@D) + $(VERBOSE) $(CXX) -c $(CXXFLAGS) -o $@ $< + +$(BUILDDIR)/%.asm.o : %.asm $(MAKEFILE_LIST) + @echo "ASM $<" + @mkdir -p $(@D) + $(VERBOSE) $(ASM) $(ASMFLAGS) -o $@ $< + +# The standard target 'clean' removes the whole generated system, the object files, and the dependency files. +clean:: + @echo "RM $(BUILDDIR)" + $(VERBOSE) rm -rf "$(BUILDDIR)" "$(BUILDDIR)$(OPTTAG)" "$(BUILDDIR)$(NOOPTTAG)" "$(BUILDDIR)$(DBGTAG)" "$(BUILDDIR)$(VERBOSETAG)" + +# Target issuing a nested call to make generating a fully optimized systems without assertions. +%$(OPTTAG): + $(VERBOSE) $(MAKE) BUILDDIR="$(BUILDDIR)$(OPTTAG)" ISODIR="$(ISODIR)$(OPTTAG)" CXXFLAGS_OPT="-Ofast -fomit-frame-pointer -flto -march=westmere -DNDEBUG" $* + +# Target issuing a nested call to make generating a non-optimized system. +%$(NOOPTTAG): + $(VERBOSE) $(MAKE) BUILDDIR="$(BUILDDIR)$(NOOPTTAG)" ISODIR="$(ISODIR)$(NOOPTTAG)" CXXFLAGS_OPT="-O0" $* + +# Target issuing a nested call to make generating a system optimized for debugging. +%$(DBGTAG): + $(VERBOSE) $(MAKE) BUILDDIR="$(BUILDDIR)$(DBGTAG)" ISODIR="$(ISODIR)$(DBGTAG)" CXXFLAGS_OPT="-Og -fno-omit-frame-pointer" $* + +# Target issuing a nested call to make generating a system with verbose output. +%$(VERBOSETAG): + $(VERBOSE) $(MAKE) BUILDDIR="$(BUILDDIR)$(VERBOSETAG)" ISODIR="$(ISODIR)$(VERBOSETAG)" CXXFLAGS_OPT="-DVERBOSE" $* + +# Documentation +help:: + @/bin/echo -e "" \ + "All targets exist in different flavours in addition to \e[2;3m\e[0m:\n" \ + "\e[2;3m\e[0;3m-noopt\e[0m, \e[2;3m\e[0;3m-opt\e[0m, \e[2;3m\e[0;3m-dbg\e[0m, and \e[2;3m\e[0;3m-verbose\e[0m.\n" \ + "Targets suffixed with \e[3m-noopt\e[0m are compiled without optimizations,\n" \ + "\e[3m-opt\e[0m targets produce a highly optimized binary, while\n" \ + "\e[3m-dbg\e[0m targets only use optimizations not hindering debugging.\n" \ + "Targets suffixed with \e[3m-verbose\e[0m generate binaries including\n" \ + "verbose output (via \e[3mDBG_VERBOSE\e[0m), making such targets useful for debugging.\n" \ + "To get a verbose make output, clear VERBOSE, e.g. \e[3mmake VERBOSE=\e[0m.\n" \ + "The following targets are available (each target can be suffixed by \e[3m-noopt\e[0m\n" \ + "and \e[3m-verbose\e[0m):\n\n" \ + " \e[3mall\e[0m Builds $(PROJECT), generating an ELF binary\n\n" + +# Print warnings, if appropriate +ifeq (,$(COMPILER)) +$(warning Unknown (and potentially unsupported) compiler "$(CXX)"!) +endif + +# Include dependency files (generated via gcc flag -MM) +ifneq ($(MAKECMDGOALS),clean) +-include $(DEP_FILES) +endif + +# Phony targets +.PHONY: clean help diff --git a/tools/common.mk b/tools/common.mk new file mode 100644 index 0000000..4e94682 --- /dev/null +++ b/tools/common.mk @@ -0,0 +1,31 @@ +# Common include Makefile + +# Hide commands +VERBOSE = @ +# Prefix for toolchain binaries +PREFIX ?= +# Project name +PROJECT ?= "MPStuBS" + +help:: + @/bin/echo -e "\n" \ + "\e[1mMAKEFILE for the teaching operating system $(PROJECT)\e[0m\n" \ + "--------------------------------------------------\n\n" \ + "Executing '\e[4mmake\e[0m' will compile the operating system from source.\n" + +# Get current directory path +CURRENT_DIR := $(dir $(lastword $(MAKEFILE_LIST))) + +# Include Makefile scripts +include $(CURRENT_DIR)/build.mk +include $(CURRENT_DIR)/qemu.mk +include $(CURRENT_DIR)/image.mk +include $(CURRENT_DIR)/linter.mk +include $(CURRENT_DIR)/remote.mk + +# Disable buitlin rules +MAKEFLAGS += --no-builtin-rules +MAKEFLAGS += --no-builtin-variables + +# Disable buitlin suffixes +.SUFFIXES: diff --git a/tools/cpplint.py b/tools/cpplint.py new file mode 100644 index 0000000..a004746 --- /dev/null +++ b/tools/cpplint.py @@ -0,0 +1,6771 @@ +#!/usr/bin/env python +# +# Copyright (c) 2009 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does google-lint on c++ files. + +The goal of this script is to identify places in the code that *may* +be in non-compliance with google style. It does not attempt to fix +up these problems -- the point is to educate. It does also not +attempt to find all problems, or to ensure that everything it does +find is legitimately a problem. + +In particular, we can get very confused by /* and // inside strings! +We do a small hack, which is to ignore //'s with "'s after them on the +same line, but it is far from perfect (in either direction). +""" + +import codecs +import copy +import getopt +import glob +import itertools +import math # for log +import os +import re +import sre_compile +import string +import sys +import sysconfig +import unicodedata +import xml.etree.ElementTree + +# if empty, use defaults +_valid_extensions = set([]) + +__VERSION__ = '1.5.2' + +try: + xrange # Python 2 +except NameError: + # -- pylint: disable=redefined-builtin + xrange = range # Python 3 + +_USAGE = """ +Syntax: cpplint.py [--verbose=#] [--output=emacs|eclipse|vs7|junit|sed|gsed] + [--filter=-x,+y,...] + [--counting=total|toplevel|detailed] [--root=subdir] + [--repository=path] + [--linelength=digits] [--headers=x,y,...] + [--recursive] + [--exclude=path] + [--extensions=hpp,cpp,...] + [--includeorder=default|standardcfirst] + [--quiet] + [--version] + [file] ... + + Style checker for C/C++ source files. + This is a fork of the Google style checker with minor extensions. + + The style guidelines this tries to follow are those in + https://google.github.io/styleguide/cppguide.html + + Every problem is given a confidence score from 1-5, with 5 meaning we are + certain of the problem, and 1 meaning it could be a legitimate construct. + This will miss some errors, and is not a substitute for a code review. + + To suppress false-positive errors of a certain category, add a + 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) + suppresses errors of all categories on that line. + + The files passed in will be linted; at least one file must be provided. + Default linted extensions are %s. + Other file types will be ignored. + Change the extensions with the --extensions flag. + + Flags: + + output=emacs|eclipse|vs7|junit|sed|gsed + By default, the output is formatted to ease emacs parsing. Visual Studio + compatible output (vs7) may also be used. Further support exists for + eclipse (eclipse), and JUnit (junit). XML parsers such as those used + in Jenkins and Bamboo may also be used. + The sed format outputs sed commands that should fix some of the errors. + Note that this requires gnu sed. If that is installed as gsed on your + system (common e.g. on macOS with homebrew) you can use the gsed output + format. Sed commands are written to stdout, not stderr, so you should be + able to pipe output straight to a shell to run the fixes. + + verbose=# + Specify a number 0-5 to restrict errors to certain verbosity levels. + Errors with lower verbosity levels have lower confidence and are more + likely to be false positives. + + quiet + Don't print anything if no errors are found. + + filter=-x,+y,... + Specify a comma-separated list of category-filters to apply: only + error messages whose category names pass the filters will be printed. + (Category names are printed with the message and look like + "[whitespace/indent]".) Filters are evaluated left to right. + "-FOO" and "FOO" means "do not print categories that start with FOO". + "+FOO" means "do print categories that start with FOO". + + Examples: --filter=-whitespace,+whitespace/braces + --filter=whitespace,runtime/printf,+runtime/printf_format + --filter=-,+build/include_what_you_use + + To see a list of all the categories used in cpplint, pass no arg: + --filter= + + counting=total|toplevel|detailed + The total number of errors found is always printed. If + 'toplevel' is provided, then the count of errors in each of + the top-level categories like 'build' and 'whitespace' will + also be printed. If 'detailed' is provided, then a count + is provided for each category like 'build/class'. + + repository=path + The top level directory of the repository, used to derive the header + guard CPP variable. By default, this is determined by searching for a + path that contains .git, .hg, or .svn. When this flag is specified, the + given path is used instead. This option allows the header guard CPP + variable to remain consistent even if members of a team have different + repository root directories (such as when checking out a subdirectory + with SVN). In addition, users of non-mainstream version control systems + can use this flag to ensure readable header guard CPP variables. + + Examples: + Assuming that Alice checks out ProjectName and Bob checks out + ProjectName/trunk and trunk contains src/chrome/ui/browser.h, then + with no --repository flag, the header guard CPP variable will be: + + Alice => TRUNK_SRC_CHROME_BROWSER_UI_BROWSER_H_ + Bob => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + If Alice uses the --repository=trunk flag and Bob omits the flag or + uses --repository=. then the header guard CPP variable will be: + + Alice => SRC_CHROME_BROWSER_UI_BROWSER_H_ + Bob => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + root=subdir + The root directory used for deriving header guard CPP variable. + This directory is relative to the top level directory of the repository + which by default is determined by searching for a directory that contains + .git, .hg, or .svn but can also be controlled with the --repository flag. + If the specified directory does not exist, this flag is ignored. + + Examples: + Assuming that src is the top level directory of the repository (and + cwd=top/src), the header guard CPP variables for + src/chrome/browser/ui/browser.h are: + + No flag => CHROME_BROWSER_UI_BROWSER_H_ + --root=chrome => BROWSER_UI_BROWSER_H_ + --root=chrome/browser => UI_BROWSER_H_ + --root=.. => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + linelength=digits + This is the allowed line length for the project. The default value is + 80 characters. + + Examples: + --linelength=120 + + recursive + Search for files to lint recursively. Each directory given in the list + of files to be linted is replaced by all files that descend from that + directory. Files with extensions not in the valid extensions list are + excluded. + + exclude=path + Exclude the given path from the list of files to be linted. Relative + paths are evaluated relative to the current directory and shell globbing + is performed. This flag can be provided multiple times to exclude + multiple files. + + Examples: + --exclude=one.cc + --exclude=src/*.cc + --exclude=src/*.cc --exclude=test/*.cc + + extensions=extension,extension,... + The allowed file extensions that cpplint will check + + Examples: + --extensions=%s + + includeorder=default|standardcfirst + For the build/include_order rule, the default is to blindly assume angle + bracket includes with file extension are c-system-headers (default), + even knowing this will have false classifications. + The default is established at google. + standardcfirst means to instead use an allow-list of known c headers and + treat all others as separate group of "other system headers". The C headers + included are those of the C-standard lib and closely related ones. + + headers=x,y,... + The header extensions that cpplint will treat as .h in checks. Values are + automatically added to --extensions list. + (by default, only files with extensions %s will be assumed to be headers) + + Examples: + --headers=%s + --headers=hpp,hxx + --headers=hpp + + cpplint.py supports per-directory configurations specified in CPPLINT.cfg + files. CPPLINT.cfg file can contain a number of key=value pairs. + Currently the following options are supported: + + set noparent + filter=+filter1,-filter2,... + exclude_files=regex + linelength=80 + root=subdir + headers=x,y,... + + "set noparent" option prevents cpplint from traversing directory tree + upwards looking for more .cfg files in parent directories. This option + is usually placed in the top-level project directory. + + The "filter" option is similar in function to --filter flag. It specifies + message filters in addition to the |_DEFAULT_FILTERS| and those specified + through --filter command-line flag. + + "exclude_files" allows to specify a regular expression to be matched against + a file name. If the expression matches, the file is skipped and not run + through the linter. + + "linelength" allows to specify the allowed line length for the project. + + The "root" option is similar in function to the --root flag (see example + above). Paths are relative to the directory of the CPPLINT.cfg. + + The "headers" option is similar in function to the --headers flag + (see example above). + + CPPLINT.cfg has an effect on files in the same directory and all + sub-directories, unless overridden by a nested configuration file. + + Example file: + filter=-build/include_order,+build/include_alpha + exclude_files=.*\\.cc + + The above example disables build/include_order warning and enables + build/include_alpha as well as excludes all .cc from being + processed by linter, in the current directory (where the .cfg + file is located) and all sub-directories. +""" + +# We categorize each error message we print. Here are the categories. +# We want an explicit list so we can list them all in cpplint --filter=. +# If you add a new error message with a new category, add it to the list +# here! cpplint_unittest.py should tell you if you forget to do this. +_ERROR_CATEGORIES = [ + 'build/class', + 'build/c++11', + 'build/c++14', + 'build/c++tr1', + 'build/deprecated', + 'build/endif_comment', + 'build/explicit_make_pair', + 'build/forward_decl', + 'build/header_guard', + 'build/include', + 'build/include_subdir', + 'build/include_alpha', + 'build/include_order', + 'build/include_what_you_use', + 'build/namespaces_headers', + 'build/namespaces_literals', + 'build/namespaces', + 'build/printf_format', + 'build/storage_class', + 'legal/copyright', + 'readability/alt_tokens', + 'readability/braces', + 'readability/casting', + 'readability/check', + 'readability/constructors', + 'readability/fn_size', + 'readability/inheritance', + 'readability/multiline_comment', + 'readability/multiline_string', + 'readability/namespace', + 'readability/nolint', + 'readability/nul', + 'readability/strings', + 'readability/todo', + 'readability/utf8', + 'runtime/arrays', + 'runtime/casting', + 'runtime/explicit', + 'runtime/int', + 'runtime/init', + 'runtime/invalid_increment', + 'runtime/member_string_references', + 'runtime/memset', + 'runtime/indentation_namespace', + 'runtime/operator', + 'runtime/printf', + 'runtime/printf_format', + 'runtime/references', + 'runtime/string', + 'runtime/threadsafe_fn', + 'runtime/vlog', + 'whitespace/blank_line', + 'whitespace/braces', + 'whitespace/comma', + 'whitespace/comments', + 'whitespace/empty_conditional_body', + 'whitespace/empty_if_body', + 'whitespace/empty_loop_body', + 'whitespace/end_of_line', + 'whitespace/ending_newline', + 'whitespace/forcolon', + 'whitespace/indent', + 'whitespace/line_length', + 'whitespace/newline', + 'whitespace/operators', + 'whitespace/parens', + 'whitespace/semicolon', + 'whitespace/tab', + 'whitespace/todo', + ] + +# These error categories are no longer enforced by cpplint, but for backwards- +# compatibility they may still appear in NOLINT comments. +_LEGACY_ERROR_CATEGORIES = [ + 'readability/streams', + 'readability/function', + ] + +# The default state of the category filter. This is overridden by the --filter= +# flag. By default all errors are on, so only add here categories that should be +# off by default (i.e., categories that must be enabled by the --filter= flags). +# All entries here should start with a '-' or '+', as in the --filter= flag. +_DEFAULT_FILTERS = ['-build/include_alpha'] + +# The default list of categories suppressed for C (not C++) files. +_DEFAULT_C_SUPPRESSED_CATEGORIES = [ + 'readability/casting', + ] + +# The default list of categories suppressed for Linux Kernel files. +_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [ + 'whitespace/tab', + ] + +# We used to check for high-bit characters, but after much discussion we +# decided those were OK, as long as they were in UTF-8 and didn't represent +# hard-coded international strings, which belong in a separate i18n file. + +# C++ headers +_CPP_HEADERS = frozenset([ + # Legacy + 'algobase.h', + 'algo.h', + 'alloc.h', + 'builtinbuf.h', + 'bvector.h', + 'complex.h', + 'defalloc.h', + 'deque.h', + 'editbuf.h', + 'fstream.h', + 'function.h', + 'hash_map', + 'hash_map.h', + 'hash_set', + 'hash_set.h', + 'hashtable.h', + 'heap.h', + 'indstream.h', + 'iomanip.h', + 'iostream.h', + 'istream.h', + 'iterator.h', + 'list.h', + 'map.h', + 'multimap.h', + 'multiset.h', + 'ostream.h', + 'pair.h', + 'parsestream.h', + 'pfstream.h', + 'procbuf.h', + 'pthread_alloc', + 'pthread_alloc.h', + 'rope', + 'rope.h', + 'ropeimpl.h', + 'set.h', + 'slist', + 'slist.h', + 'stack.h', + 'stdiostream.h', + 'stl_alloc.h', + 'stl_relops.h', + 'streambuf.h', + 'stream.h', + 'strfile.h', + 'strstream.h', + 'tempbuf.h', + 'tree.h', + 'type_traits.h', + 'vector.h', + # 17.6.1.2 C++ library headers + 'algorithm', + 'array', + 'atomic', + 'bitset', + 'chrono', + 'codecvt', + 'complex', + 'condition_variable', + 'deque', + 'exception', + 'forward_list', + 'fstream', + 'functional', + 'future', + 'initializer_list', + 'iomanip', + 'ios', + 'iosfwd', + 'iostream', + 'istream', + 'iterator', + 'limits', + 'list', + 'locale', + 'map', + 'memory', + 'mutex', + 'new', + 'numeric', + 'ostream', + 'queue', + 'random', + 'ratio', + 'regex', + 'scoped_allocator', + 'set', + 'sstream', + 'stack', + 'stdexcept', + 'streambuf', + 'string', + 'strstream', + 'system_error', + 'thread', + 'tuple', + 'typeindex', + 'typeinfo', + 'type_traits', + 'unordered_map', + 'unordered_set', + 'utility', + 'valarray', + 'vector', + # 17.6.1.2 C++14 headers + 'shared_mutex', + # 17.6.1.2 C++17 headers + 'any', + 'charconv', + 'codecvt', + 'execution', + 'filesystem', + 'memory_resource', + 'optional', + 'string_view', + 'variant', + # 17.6.1.2 C++ headers for C library facilities + 'cassert', + 'ccomplex', + 'cctype', + 'cerrno', + 'cfenv', + 'cfloat', + 'cinttypes', + 'ciso646', + 'climits', + 'clocale', + 'cmath', + 'csetjmp', + 'csignal', + 'cstdalign', + 'cstdarg', + 'cstdbool', + 'cstddef', + 'cstdint', + 'cstdio', + 'cstdlib', + 'cstring', + 'ctgmath', + 'ctime', + 'cuchar', + 'cwchar', + 'cwctype', + ]) + +# C headers +_C_HEADERS = frozenset([ + # System C headers + 'assert.h', + 'complex.h', + 'ctype.h', + 'errno.h', + 'fenv.h', + 'float.h', + 'inttypes.h', + 'iso646.h', + 'limits.h', + 'locale.h', + 'math.h', + 'setjmp.h', + 'signal.h', + 'stdalign.h', + 'stdarg.h', + 'stdatomic.h', + 'stdbool.h', + 'stddef.h', + 'stdint.h', + 'stdio.h', + 'stdlib.h', + 'stdnoreturn.h', + 'string.h', + 'tgmath.h', + 'threads.h', + 'time.h', + 'uchar.h', + 'wchar.h', + 'wctype.h', + # additional POSIX C headers + 'aio.h', + 'arpa/inet.h', + 'cpio.h', + 'dirent.h', + 'dlfcn.h', + 'fcntl.h', + 'fmtmsg.h', + 'fnmatch.h', + 'ftw.h', + 'glob.h', + 'grp.h', + 'iconv.h', + 'langinfo.h', + 'libgen.h', + 'monetary.h', + 'mqueue.h', + 'ndbm.h', + 'net/if.h', + 'netdb.h', + 'netinet/in.h', + 'netinet/tcp.h', + 'nl_types.h', + 'poll.h', + 'pthread.h', + 'pwd.h', + 'regex.h', + 'sched.h', + 'search.h', + 'semaphore.h', + 'setjmp.h', + 'signal.h', + 'spawn.h', + 'strings.h', + 'stropts.h', + 'syslog.h', + 'tar.h', + 'termios.h', + 'trace.h', + 'ulimit.h', + 'unistd.h', + 'utime.h', + 'utmpx.h', + 'wordexp.h', + # additional GNUlib headers + 'a.out.h', + 'aliases.h', + 'alloca.h', + 'ar.h', + 'argp.h', + 'argz.h', + 'byteswap.h', + 'crypt.h', + 'endian.h', + 'envz.h', + 'err.h', + 'error.h', + 'execinfo.h', + 'fpu_control.h', + 'fstab.h', + 'fts.h', + 'getopt.h', + 'gshadow.h', + 'ieee754.h', + 'ifaddrs.h', + 'libintl.h', + 'mcheck.h', + 'mntent.h', + 'obstack.h', + 'paths.h', + 'printf.h', + 'pty.h', + 'resolv.h', + 'shadow.h', + 'sysexits.h', + 'ttyent.h', + # Additional linux glibc headers + 'dlfcn.h', + 'elf.h', + 'features.h', + 'gconv.h', + 'gnu-versions.h', + 'lastlog.h', + 'libio.h', + 'link.h', + 'malloc.h', + 'memory.h', + 'netash/ash.h', + 'netatalk/at.h', + 'netax25/ax25.h', + 'neteconet/ec.h', + 'netipx/ipx.h', + 'netiucv/iucv.h', + 'netpacket/packet.h', + 'netrom/netrom.h', + 'netrose/rose.h', + 'nfs/nfs.h', + 'nl_types.h', + 'nss.h', + 're_comp.h', + 'regexp.h', + 'sched.h', + 'sgtty.h', + 'stab.h', + 'stdc-predef.h', + 'stdio_ext.h', + 'syscall.h', + 'termio.h', + 'thread_db.h', + 'ucontext.h', + 'ustat.h', + 'utmp.h', + 'values.h', + 'wait.h', + 'xlocale.h', + # Hardware specific headers + 'arm_neon.h', + 'emmintrin.h', + 'xmmintin.h', + ]) + +# Folders of C libraries so commonly used in C++, +# that they have parity with standard C libraries. +C_STANDARD_HEADER_FOLDERS = frozenset([ + # standard C library + "sys", + # glibc for linux + "arpa", + "asm-generic", + "bits", + "gnu", + "net", + "netinet", + "protocols", + "rpc", + "rpcsvc", + "scsi", + # linux kernel header + "drm", + "linux", + "misc", + "mtd", + "rdma", + "sound", + "video", + "xen", + ]) + +# Type names +_TYPES = re.compile( + r'^(?:' + # [dcl.type.simple] + r'(char(16_t|32_t)?)|wchar_t|' + r'bool|short|int|long|signed|unsigned|float|double|' + # [support.types] + r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|' + # [cstdint.syn] + r'(u?int(_fast|_least)?(8|16|32|64)_t)|' + r'(u?int(max|ptr)_t)|' + r')$') + +# These headers are excluded from [build/include] and [build/include_order] +# checks: +# - Anything not following google file name conventions (containing an +# uppercase character, such as Python.h or nsStringAPI.h, for example). +# - Lua headers. +_THIRD_PARTY_HEADERS_PATTERN = re.compile( + r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') + +# Pattern for matching FileInfo.BaseName() against test file name +_test_suffixes = ['_test', '_regtest', '_unittest'] +_TEST_FILE_SUFFIX = '(' + '|'.join(_test_suffixes) + r')$' + +# Pattern that matches only complete whitespace, possibly across multiple lines. +_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL) + +# Assertion macros. These are defined in base/logging.h and +# testing/base/public/gunit.h. +_CHECK_MACROS = [ + 'DCHECK', 'CHECK', + 'EXPECT_TRUE', 'ASSERT_TRUE', + 'EXPECT_FALSE', 'ASSERT_FALSE', + ] + +# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE +_CHECK_REPLACEMENT = dict([(macro_var, {}) for macro_var in _CHECK_MACROS]) + +for op, replacement in [('==', 'EQ'), ('!=', 'NE'), + ('>=', 'GE'), ('>', 'GT'), + ('<=', 'LE'), ('<', 'LT')]: + _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement + _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement + _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement + _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement + +for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), + ('>=', 'LT'), ('>', 'LE'), + ('<=', 'GT'), ('<', 'GE')]: + _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement + _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement + +# Alternative tokens and their replacements. For full list, see section 2.5 +# Alternative tokens [lex.digraph] in the C++ standard. +# +# Digraphs (such as '%:') are not included here since it's a mess to +# match those on a word boundary. +_ALT_TOKEN_REPLACEMENT = { + 'and': '&&', + 'bitor': '|', + 'or': '||', + 'xor': '^', + 'compl': '~', + 'bitand': '&', + 'and_eq': '&=', + 'or_eq': '|=', + 'xor_eq': '^=', + 'not': '!', + 'not_eq': '!=' + } + +# Compile regular expression that matches all the above keywords. The "[ =()]" +# bit is meant to avoid matching these keywords outside of boolean expressions. +# +# False positives include C-style multi-line comments and multi-line strings +# but those have always been troublesome for cpplint. +_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( + r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') + +# These constants define types of headers for use with +# _IncludeState.CheckNextIncludeOrder(). +_C_SYS_HEADER = 1 +_CPP_SYS_HEADER = 2 +_OTHER_SYS_HEADER = 3 +_LIKELY_MY_HEADER = 4 +_POSSIBLE_MY_HEADER = 5 +_OTHER_HEADER = 6 + +# These constants define the current inline assembly state +_NO_ASM = 0 # Outside of inline assembly block +_INSIDE_ASM = 1 # Inside inline assembly block +_END_ASM = 2 # Last line of inline assembly block +_BLOCK_ASM = 3 # The whole block is an inline assembly block + +# Match start of assembly blocks +_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' + r'(?:\s+(volatile|__volatile__))?' + r'\s*[{(]') + +# Match strings that indicate we're working on a C (not C++) file. +_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|' + r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))') + +# Match string that indicates we're working on a Linux Kernel file. +_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)') + +# Commands for sed to fix the problem +_SED_FIXUPS = { + 'Remove spaces around =': r's/ = /=/', + 'Remove spaces around !=': r's/ != /!=/', + 'Remove space before ( in if (': r's/if (/if(/', + 'Remove space before ( in for (': r's/for (/for(/', + 'Remove space before ( in while (': r's/while (/while(/', + 'Remove space before ( in switch (': r's/switch (/switch(/', + 'Should have a space between // and comment': r's/\/\//\/\/ /', + 'Missing space before {': r's/\([^ ]\){/\1 {/', + 'Tab found, replace by spaces': r's/\t/ /g', + 'Line ends in whitespace. Consider deleting these extra spaces.': r's/\s*$//', + 'You don\'t need a ; after a }': r's/};/}/', + 'Missing space after ,': r's/,\([^ ]\)/, \1/g', +} + +_regexp_compile_cache = {} + +# {str, set(int)}: a map from error categories to sets of linenumbers +# on which those errors are expected and should be suppressed. +_error_suppressions = {} + +# The root directory used for deriving header guard CPP variable. +# This is set by --root flag. +_root = None +_root_debug = False + +# The top level repository directory. If set, _root is calculated relative to +# this directory instead of the directory containing version control artifacts. +# This is set by the --repository flag. +_repository = None + +# Files to exclude from linting. This is set by the --exclude flag. +_excludes = None + +# Whether to supress PrintInfo messages +_quiet = False + +# The allowed line length of files. +# This is set by --linelength flag. +_line_length = 80 + +# This allows to use different include order rule than default +_include_order = "default" + +try: + unicode +except NameError: + # -- pylint: disable=redefined-builtin + basestring = unicode = str + +try: + long +except NameError: + # -- pylint: disable=redefined-builtin + long = int + +if sys.version_info < (3,): + # -- pylint: disable=no-member + # BINARY_TYPE = str + itervalues = dict.itervalues + iteritems = dict.iteritems +else: + # BINARY_TYPE = bytes + itervalues = dict.values + iteritems = dict.items + +def unicode_escape_decode(x): + if sys.version_info < (3,): + return codecs.unicode_escape_decode(x)[0] + else: + return x + +# Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc. +# This is set by --headers flag. +_hpp_headers = set([]) + +# {str, bool}: a map from error categories to booleans which indicate if the +# category should be suppressed for every line. +_global_error_suppressions = {} + +def ProcessHppHeadersOption(val): + global _hpp_headers + try: + _hpp_headers = {ext.strip() for ext in val.split(',')} + except ValueError: + PrintUsage('Header extensions must be comma separated list.') + +def ProcessIncludeOrderOption(val): + if val is None or val == "default": + pass + elif val == "standardcfirst": + global _include_order + _include_order = val + else: + PrintUsage('Invalid includeorder value %s. Expected default|standardcfirst') + +def IsHeaderExtension(file_extension): + return file_extension in GetHeaderExtensions() + +def GetHeaderExtensions(): + if _hpp_headers: + return _hpp_headers + if _valid_extensions: + return {h for h in _valid_extensions if 'h' in h} + return set(['h', 'hh', 'hpp', 'hxx', 'h++', 'cuh']) + +# The allowed extensions for file names +# This is set by --extensions flag +def GetAllExtensions(): + return GetHeaderExtensions().union(_valid_extensions or set( + ['c', 'cc', 'cpp', 'cxx', 'c++', 'cu'])) + +def ProcessExtensionsOption(val): + global _valid_extensions + try: + extensions = [ext.strip() for ext in val.split(',')] + _valid_extensions = set(extensions) + except ValueError: + PrintUsage('Extensions should be a comma-separated list of values;' + 'for example: extensions=hpp,cpp\n' + 'This could not be parsed: "%s"' % (val,)) + +def GetNonHeaderExtensions(): + return GetAllExtensions().difference(GetHeaderExtensions()) + +def ParseNolintSuppressions(filename, raw_line, linenum, error): + """Updates the global list of line error-suppressions. + + Parses any NOLINT comments on the current line, updating the global + error_suppressions store. Reports an error if the NOLINT comment + was malformed. + + Args: + filename: str, the name of the input file. + raw_line: str, the line of input text, with comments. + linenum: int, the number of the current line. + error: function, an error handler. + """ + matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line) + if matched: + if matched.group(1): + suppressed_line = linenum + 1 + else: + suppressed_line = linenum + category = matched.group(2) + if category in (None, '(*)'): # => "suppress all" + _error_suppressions.setdefault(None, set()).add(suppressed_line) + else: + if category.startswith('(') and category.endswith(')'): + category = category[1:-1] + if category in _ERROR_CATEGORIES: + _error_suppressions.setdefault(category, set()).add(suppressed_line) + elif category not in _LEGACY_ERROR_CATEGORIES: + error(filename, linenum, 'readability/nolint', 5, + 'Unknown NOLINT error category: %s' % category) + +def ProcessGlobalSuppresions(lines): + """Updates the list of global error suppressions. + + Parses any lint directives in the file that have global effect. + + Args: + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + """ + for line in lines: + if _SEARCH_C_FILE.search(line): + for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + if _SEARCH_KERNEL_FILE.search(line): + for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + +def ResetNolintSuppressions(): + """Resets the set of NOLINT suppressions to empty.""" + _error_suppressions.clear() + _global_error_suppressions.clear() + +def IsErrorSuppressedByNolint(category, linenum): + """Returns true if the specified error category is suppressed on this line. + + Consults the global error_suppressions map populated by + ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions. + + Args: + category: str, the category of the error. + linenum: int, the current line number. + Returns: + bool, True iff the error should be suppressed due to a NOLINT comment or + global suppression. + """ + return (_global_error_suppressions.get(category, False) or + linenum in _error_suppressions.get(category, set()) or + linenum in _error_suppressions.get(None, set())) + +def Match(pattern, s): + """Matches the string with the pattern, caching the compiled regexp.""" + # The regexp compilation caching is inlined in both Match and Search for + # performance reasons; factoring it out into a separate function turns out + # to be noticeably expensive. + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].match(s) + +def ReplaceAll(pattern, rep, s): + """Replaces instances of pattern in a string with a replacement. + + The compiled regex is kept in a cache shared by Match and Search. + + Args: + pattern: regex pattern + rep: replacement text + s: search string + + Returns: + string with replacements made (or original string if no replacements) + """ + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].sub(rep, s) + +def Search(pattern, s): + """Searches the string for the pattern, caching the compiled regexp.""" + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].search(s) + +def _IsSourceExtension(s): + """File extension (excluding dot) matches a source file extension.""" + return s in GetNonHeaderExtensions() + +class _IncludeState(object): + """Tracks line numbers for includes, and the order in which includes appear. + + include_list contains list of lists of (header, line number) pairs. + It's a lists of lists rather than just one flat list to make it + easier to update across preprocessor boundaries. + + Call CheckNextIncludeOrder() once for each header in the file, passing + in the type constants defined above. Calls in an illegal order will + raise an _IncludeError with an appropriate error message. + + """ + # self._section will move monotonically through this set. If it ever + # needs to move backwards, CheckNextIncludeOrder will raise an error. + _INITIAL_SECTION = 0 + _MY_H_SECTION = 1 + _C_SECTION = 2 + _CPP_SECTION = 3 + _OTHER_SYS_SECTION = 4 + _OTHER_H_SECTION = 5 + + _TYPE_NAMES = { + _C_SYS_HEADER: 'C system header', + _CPP_SYS_HEADER: 'C++ system header', + _OTHER_SYS_HEADER: 'other system header', + _LIKELY_MY_HEADER: 'header this file implements', + _POSSIBLE_MY_HEADER: 'header this file may implement', + _OTHER_HEADER: 'other header', + } + _SECTION_NAMES = { + _INITIAL_SECTION: "... nothing. (This can't be an error.)", + _MY_H_SECTION: 'a header this file implements', + _C_SECTION: 'C system header', + _CPP_SECTION: 'C++ system header', + _OTHER_SYS_SECTION: 'other system header', + _OTHER_H_SECTION: 'other header', + } + + def __init__(self): + self.include_list = [[]] + self._section = None + self._last_header = None + self.ResetSection('') + + def FindHeader(self, header): + """Check if a header has already been included. + + Args: + header: header to check. + Returns: + Line number of previous occurrence, or -1 if the header has not + been seen before. + """ + for section_list in self.include_list: + for f in section_list: + if f[0] == header: + return f[1] + return -1 + + def ResetSection(self, directive): + """Reset section checking for preprocessor directive. + + Args: + directive: preprocessor directive (e.g. "if", "else"). + """ + # The name of the current section. + self._section = self._INITIAL_SECTION + # The path of last found header. + self._last_header = '' + + # Update list of includes. Note that we never pop from the + # include list. + if directive in ('if', 'ifdef', 'ifndef'): + self.include_list.append([]) + elif directive in ('else', 'elif'): + self.include_list[-1] = [] + + def SetLastHeader(self, header_path): + self._last_header = header_path + + def CanonicalizeAlphabeticalOrder(self, header_path): + """Returns a path canonicalized for alphabetical comparison. + + - replaces "-" with "_" so they both cmp the same. + - removes '-inl' since we don't require them to be after the main header. + - lowercase everything, just in case. + + Args: + header_path: Path to be canonicalized. + + Returns: + Canonicalized path. + """ + return header_path.replace('-inl.h', '.h').replace('-', '_').lower() + + def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): + """Check if a header is in alphabetical order with the previous header. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + header_path: Canonicalized header to be checked. + + Returns: + Returns true if the header is in alphabetical order. + """ + # If previous section is different from current section, _last_header will + # be reset to empty string, so it's always less than current header. + # + # If previous line was a blank line, assume that the headers are + # intentionally sorted the way they are. + if (self._last_header > header_path and + Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): + return False + return True + + def CheckNextIncludeOrder(self, header_type): + """Returns a non-empty error message if the next header is out of order. + + This function also updates the internal state to be ready to check + the next include. + + Args: + header_type: One of the _XXX_HEADER constants defined above. + + Returns: + The empty string if the header is in the right order, or an + error message describing what's wrong. + + """ + error_message = ('Found %s after %s' % + (self._TYPE_NAMES[header_type], + self._SECTION_NAMES[self._section])) + + last_section = self._section + + if header_type == _C_SYS_HEADER: + if self._section <= self._C_SECTION: + self._section = self._C_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _CPP_SYS_HEADER: + if self._section <= self._CPP_SECTION: + self._section = self._CPP_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _OTHER_SYS_HEADER: + if self._section <= self._OTHER_SYS_SECTION: + self._section = self._OTHER_SYS_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _LIKELY_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + self._section = self._OTHER_H_SECTION + elif header_type == _POSSIBLE_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + # This will always be the fallback because we're not sure + # enough that the header is associated with this file. + self._section = self._OTHER_H_SECTION + else: + assert header_type == _OTHER_HEADER + self._section = self._OTHER_H_SECTION + + if last_section != self._section: + self._last_header = '' + + return '' + +class _CppLintState(object): + """Maintains module-wide state..""" + + def __init__(self): + self.verbose_level = 1 # global setting. + self.error_count = 0 # global count of reported errors + # filters to apply when emitting error messages + self.filters = _DEFAULT_FILTERS[:] + # backup of filter list. Used to restore the state after each file. + self._filters_backup = self.filters[:] + self.counting = 'total' # In what way are we counting errors? + self.errors_by_category = {} # string to int dict storing error counts + self.quiet = False # Suppress non-error messagess? + + # output format: + # "emacs" - format that emacs can parse (default) + # "eclipse" - format that eclipse can parse + # "vs7" - format that Microsoft Visual Studio 7 can parse + # "junit" - format that Jenkins, Bamboo, etc can parse + # "sed" - returns a gnu sed command to fix the problem + # "gsed" - like sed, but names the command gsed, e.g. for macOS homebrew users + self.output_format = 'emacs' + + # For JUnit output, save errors and failures until the end so that they + # can be written into the XML + self._junit_errors = [] + self._junit_failures = [] + + def SetOutputFormat(self, output_format): + """Sets the output format for errors.""" + self.output_format = output_format + + def SetQuiet(self, quiet): + """Sets the module's quiet settings, and returns the previous setting.""" + last_quiet = self.quiet + self.quiet = quiet + return last_quiet + + def SetVerboseLevel(self, level): + """Sets the module's verbosity, and returns the previous setting.""" + last_verbose_level = self.verbose_level + self.verbose_level = level + return last_verbose_level + + def SetCountingStyle(self, counting_style): + """Sets the module's counting options.""" + self.counting = counting_style + + def SetFilters(self, filters): + """Sets the error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "+whitespace/indent"). + Each filter should start with + or -; else we die. + + Raises: + ValueError: The comma-separated filters did not all start with '+' or '-'. + E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" + """ + # Default filters always have less priority than the flag ones. + self.filters = _DEFAULT_FILTERS[:] + self.AddFilters(filters) + + def AddFilters(self, filters): + """ Adds more filters to the existing list of error-message filters. """ + for filt in filters.split(','): + clean_filt = filt.strip() + if clean_filt: + self.filters.append(clean_filt) + for filt in self.filters: + if not (filt.startswith('+') or filt.startswith('-')): + raise ValueError('Every filter in --filters must start with + or -' + ' (%s does not)' % filt) + + def BackupFilters(self): + """ Saves the current filter list to backup storage.""" + self._filters_backup = self.filters[:] + + def RestoreFilters(self): + """ Restores filters previously backed up.""" + self.filters = self._filters_backup[:] + + def ResetErrorCounts(self): + """Sets the module's error statistic back to zero.""" + self.error_count = 0 + self.errors_by_category = {} + + def IncrementErrorCount(self, category): + """Bumps the module's error statistic.""" + self.error_count += 1 + if self.counting in ('toplevel', 'detailed'): + if self.counting != 'detailed': + category = category.split('/')[0] + if category not in self.errors_by_category: + self.errors_by_category[category] = 0 + self.errors_by_category[category] += 1 + + def PrintErrorCounts(self): + """Print a summary of errors by category, and the total.""" + for category, count in sorted(iteritems(self.errors_by_category)): + self.PrintInfo('Category \'%s\' errors found: %d\n' % + (category, count)) + if self.error_count > 0: + self.PrintInfo('Total errors found: %d\n' % self.error_count) + + def PrintInfo(self, message): + if not _quiet and self.output_format != 'junit': + sys.stdout.write(message) + + def PrintError(self, message): + if self.output_format == 'junit': + self._junit_errors.append(message) + else: + sys.stderr.write(message) + + def AddJUnitFailure(self, filename, linenum, message, category, confidence): + self._junit_failures.append((filename, linenum, message, category, + confidence)) + + def FormatJUnitXML(self): + num_errors = len(self._junit_errors) + num_failures = len(self._junit_failures) + + testsuite = xml.etree.ElementTree.Element('testsuite') + testsuite.attrib['errors'] = str(num_errors) + testsuite.attrib['failures'] = str(num_failures) + testsuite.attrib['name'] = 'cpplint' + + if num_errors == 0 and num_failures == 0: + testsuite.attrib['tests'] = str(1) + xml.etree.ElementTree.SubElement(testsuite, 'testcase', name='passed') + + else: + testsuite.attrib['tests'] = str(num_errors + num_failures) + if num_errors > 0: + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = 'errors' + error = xml.etree.ElementTree.SubElement(testcase, 'error') + error.text = '\n'.join(self._junit_errors) + if num_failures > 0: + # Group failures by file + failed_file_order = [] + failures_by_file = {} + for failure in self._junit_failures: + failed_file = failure[0] + if failed_file not in failed_file_order: + failed_file_order.append(failed_file) + failures_by_file[failed_file] = [] + failures_by_file[failed_file].append(failure) + # Create a testcase for each file + for failed_file in failed_file_order: + failures = failures_by_file[failed_file] + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = failed_file + failure = xml.etree.ElementTree.SubElement(testcase, 'failure') + template = '{0}: {1} [{2}] [{3}]' + texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures] + failure.text = '\n'.join(texts) + + xml_decl = '\n' + return xml_decl + xml.etree.ElementTree.tostring(testsuite, 'utf-8').decode('utf-8') + +_cpplint_state = _CppLintState() + +def _OutputFormat(): + """Gets the module's output format.""" + return _cpplint_state.output_format + +def _SetOutputFormat(output_format): + """Sets the module's output format.""" + _cpplint_state.SetOutputFormat(output_format) + +def _Quiet(): + """Return's the module's quiet setting.""" + return _cpplint_state.quiet + +def _SetQuiet(quiet): + """Set the module's quiet status, and return previous setting.""" + return _cpplint_state.SetQuiet(quiet) + +def _VerboseLevel(): + """Returns the module's verbosity setting.""" + return _cpplint_state.verbose_level + +def _SetVerboseLevel(level): + """Sets the module's verbosity, and returns the previous setting.""" + return _cpplint_state.SetVerboseLevel(level) + +def _SetCountingStyle(level): + """Sets the module's counting options.""" + _cpplint_state.SetCountingStyle(level) + +def _Filters(): + """Returns the module's list of output filters, as a list.""" + return _cpplint_state.filters + +def _SetFilters(filters): + """Sets the module's error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.SetFilters(filters) + +def _AddFilters(filters): + """Adds more filter overrides. + + Unlike _SetFilters, this function does not reset the current list of filters + available. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.AddFilters(filters) + +def _BackupFilters(): + """ Saves the current filter list to backup storage.""" + _cpplint_state.BackupFilters() + +def _RestoreFilters(): + """ Restores filters previously backed up.""" + _cpplint_state.RestoreFilters() + +class _FunctionState(object): + """Tracks current function name and the number of lines in its body.""" + + _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. + _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. + + def __init__(self): + self.in_a_function = False + self.lines_in_function = 0 + self.current_function = '' + + def Begin(self, function_name): + """Start analyzing function body. + + Args: + function_name: The name of the function being tracked. + """ + self.in_a_function = True + self.lines_in_function = 0 + self.current_function = function_name + + def Count(self): + """Count line in current function body.""" + if self.in_a_function: + self.lines_in_function += 1 + + def Check(self, error, filename, linenum): + """Report if too many lines in function body. + + Args: + error: The function to call with any errors found. + filename: The name of the current file. + linenum: The number of the line to check. + """ + if not self.in_a_function: + return + + if Match(r'T(EST|est)', self.current_function): + base_trigger = self._TEST_TRIGGER + else: + base_trigger = self._NORMAL_TRIGGER + trigger = base_trigger * 2**_VerboseLevel() + + if self.lines_in_function > trigger: + error_level = int(math.log(self.lines_in_function / base_trigger, 2)) + # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... + if error_level > 5: + error_level = 5 + error(filename, linenum, 'readability/fn_size', error_level, + 'Small and focused functions are preferred:' + ' %s has %d non-comment lines' + ' (error triggered by exceeding %d lines).' % ( + self.current_function, self.lines_in_function, trigger)) + + def End(self): + """Stop analyzing function body.""" + self.in_a_function = False + +class _IncludeError(Exception): + """Indicates a problem with the include order in a file.""" + pass + +class FileInfo(object): + """Provides utility functions for filenames. + + FileInfo provides easy access to the components of a file's path + relative to the project root. + """ + + def __init__(self, filename): + self._filename = filename + + def FullName(self): + """Make Windows paths like Unix.""" + return os.path.abspath(self._filename).replace('\\', '/') + + def RepositoryName(self): + r"""FullName after removing the local path to the repository. + + If we have a real absolute path name here we can try to do something smart: + detecting the root of the checkout and truncating /path/to/checkout from + the name so that we get header guards that don't include things like + "C:\\Documents and Settings\\..." or "/home/username/..." in them and thus + people on different computers who have checked the source out to different + locations won't see bogus errors. + """ + fullname = self.FullName() + + if os.path.exists(fullname): + project_dir = os.path.dirname(fullname) + + # If the user specified a repository path, it exists, and the file is + # contained in it, use the specified repository path + if _repository: + repo = FileInfo(_repository).FullName() + root_dir = project_dir + while os.path.exists(root_dir): + # allow case insensitive compare on Windows + if os.path.normcase(root_dir) == os.path.normcase(repo): + return os.path.relpath(fullname, root_dir).replace('\\', '/') + one_up_dir = os.path.dirname(root_dir) + if one_up_dir == root_dir: + break + root_dir = one_up_dir + + if os.path.exists(os.path.join(project_dir, ".svn")): + # If there's a .svn file in the current directory, we recursively look + # up the directory tree for the top of the SVN checkout + root_dir = project_dir + one_up_dir = os.path.dirname(root_dir) + while os.path.exists(os.path.join(one_up_dir, ".svn")): + root_dir = os.path.dirname(root_dir) + one_up_dir = os.path.dirname(one_up_dir) + + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by + # searching up from the current path. + root_dir = current_dir = os.path.dirname(fullname) + while current_dir != os.path.dirname(current_dir): + if (os.path.exists(os.path.join(current_dir, ".git")) or + os.path.exists(os.path.join(current_dir, ".hg")) or + os.path.exists(os.path.join(current_dir, ".svn"))): + root_dir = current_dir + current_dir = os.path.dirname(current_dir) + + if (os.path.exists(os.path.join(root_dir, ".git")) or + os.path.exists(os.path.join(root_dir, ".hg")) or + os.path.exists(os.path.join(root_dir, ".svn"))): + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Don't know what to do; header guard warnings may be wrong... + return fullname + + def Split(self): + """Splits the file into the directory, basename, and extension. + + For 'chrome/browser/browser.cc', Split() would + return ('chrome/browser', 'browser', '.cc') + + Returns: + A tuple of (directory, basename, extension). + """ + + googlename = self.RepositoryName() + project, rest = os.path.split(googlename) + return (project,) + os.path.splitext(rest) + + def BaseName(self): + """File base name - text after the final slash, before the final period.""" + return self.Split()[1] + + def Extension(self): + """File extension - text following the final period, includes that period.""" + return self.Split()[2] + + def NoExtension(self): + """File has no source file extension.""" + return '/'.join(self.Split()[0:2]) + + def IsSource(self): + """File has a source file extension.""" + return _IsSourceExtension(self.Extension()[1:]) + +def _ShouldPrintError(category, confidence, linenum): + """If confidence >= verbose, category passes filter and is not suppressed.""" + + # There are three ways we might decide not to print an error message: + # a "NOLINT(category)" comment appears in the source, + # the verbosity level isn't high enough, or the filters filter it out. + if IsErrorSuppressedByNolint(category, linenum): + return False + + if confidence < _cpplint_state.verbose_level: + return False + + is_filtered = False + for one_filter in _Filters(): + if one_filter.startswith('-'): + if category.startswith(one_filter[1:]): + is_filtered = True + elif one_filter.startswith('+'): + if category.startswith(one_filter[1:]): + is_filtered = False + else: + assert False # should have been checked for in SetFilter. + if is_filtered: + return False + + return True + +def Error(filename, linenum, category, confidence, message): + """Logs the fact we've found a lint error. + + We log where the error was found, and also our confidence in the error, + that is, how certain we are this is a legitimate style regression, and + not a misidentification or a use that's sometimes justified. + + False positives can be suppressed by the use of + "cpplint(category)" comments on the offending line. These are + parsed into _error_suppressions. + + Args: + filename: The name of the file containing the error. + linenum: The number of the line containing the error. + category: A string used to describe the "category" this bug + falls under: "whitespace", say, or "runtime". Categories + may have a hierarchy separated by slashes: "whitespace/indent". + confidence: A number from 1-5 representing a confidence score for + the error, with 5 meaning that we are certain of the problem, + and 1 meaning that it could be a legitimate construct. + message: The error message. + """ + if _ShouldPrintError(category, confidence, linenum): + _cpplint_state.IncrementErrorCount(category) + if _cpplint_state.output_format == 'vs7': + _cpplint_state.PrintError('%s(%s): error cpplint: [%s] %s [%d]\n' % ( + filename, linenum, category, message, confidence)) + elif _cpplint_state.output_format == 'eclipse': + sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + elif _cpplint_state.output_format == 'junit': + _cpplint_state.AddJUnitFailure(filename, linenum, message, category, + confidence) + elif _cpplint_state.output_format in ['sed', 'gsed']: + if message in _SED_FIXUPS: + sys.stdout.write(_cpplint_state.output_format + " -i '%s%s' %s # %s [%s] [%d]\n" % ( + linenum, _SED_FIXUPS[message], filename, message, category, confidence)) + else: + sys.stderr.write('# %s:%s: "%s" [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + else: + final_message = '%s:%s: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence) + sys.stderr.write(final_message) + +# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. +_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( + r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') +# Match a single C style comment on the same line. +_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' +# Matches multi-line C style comments. +# This RE is a little bit more complicated than one might expect, because we +# have to take care of space removals tools so we can handle comments inside +# statements better. +# The current rule is: We only clear spaces from both sides when we're at the +# end of the line. Otherwise, we try to remove spaces from the right side, +# if this doesn't work we try on left side but only if there's a non-character +# on the right. +_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( + r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + + _RE_PATTERN_C_COMMENTS + r'\s+|' + + r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + + _RE_PATTERN_C_COMMENTS + r')') + +def IsCppString(line): + """Does line terminate so, that the next symbol is in string constant. + + This function does not consider single-line nor multi-line comments. + + Args: + line: is a partial line of code starting from the 0..n. + + Returns: + True, if next character appended to 'line' is inside a + string constant. + """ + + line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" + return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 + +def CleanseRawStrings(raw_lines): + """Removes C++11 raw strings from lines. + + Before: + static const char kData[] = R"( + multi-line string + )"; + + After: + static const char kData[] = "" + (replaced by blank line) + ""; + + Args: + raw_lines: list of raw lines. + + Returns: + list of lines with C++11 raw strings replaced by empty strings. + """ + + delimiter = None + lines_without_raw_strings = [] + for line in raw_lines: + if delimiter: + # Inside a raw string, look for the end + end = line.find(delimiter) + if end >= 0: + # Found the end of the string, match leading space for this + # line and resume copying the original lines, and also insert + # a "" on the last line. + leading_space = Match(r'^(\s*)\S', line) + line = leading_space.group(1) + '""' + line[end + len(delimiter):] + delimiter = None + else: + # Haven't found the end yet, append a blank line. + line = '""' + + # Look for beginning of a raw string, and replace them with + # empty strings. This is done in a loop to handle multiple raw + # strings on the same line. + while delimiter is None: + # Look for beginning of a raw string. + # See 2.14.15 [lex.string] for syntax. + # + # Once we have matched a raw string, we check the prefix of the + # line to make sure that the line is not part of a single line + # comment. It's done this way because we remove raw strings + # before removing comments as opposed to removing comments + # before removing raw strings. This is because there are some + # cpplint checks that requires the comments to be preserved, but + # we don't want to check comments that are inside raw strings. + matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) + if (matched and + not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', + matched.group(1))): + delimiter = ')' + matched.group(2) + '"' + + end = matched.group(3).find(delimiter) + if end >= 0: + # Raw string ended on same line + line = (matched.group(1) + '""' + + matched.group(3)[end + len(delimiter):]) + delimiter = None + else: + # Start of a multi-line raw string + line = matched.group(1) + '""' + else: + break + + lines_without_raw_strings.append(line) + + # TODO(unknown): if delimiter is not None here, we might want to + # emit a warning for unterminated string. + return lines_without_raw_strings + +def FindNextMultiLineCommentStart(lines, lineix): + """Find the beginning marker for a multiline comment.""" + while lineix < len(lines): + if lines[lineix].strip().startswith('/*'): + # Only return this marker if the comment goes beyond this line + if lines[lineix].strip().find('*/', 2) < 0: + return lineix + lineix += 1 + return len(lines) + +def FindNextMultiLineCommentEnd(lines, lineix): + """We are inside a comment, find the end marker.""" + while lineix < len(lines): + if lines[lineix].strip().endswith('*/'): + return lineix + lineix += 1 + return len(lines) + +def RemoveMultiLineCommentsFromRange(lines, begin, end): + """Clears a range of lines for multi-line comments.""" + # Having // dummy comments makes the lines non-empty, so we will not get + # unnecessary blank line warnings later in the code. + for i in range(begin, end): + lines[i] = '/**/' + +def RemoveMultiLineComments(filename, lines, error): + """Removes multiline (c-style) comments from lines.""" + lineix = 0 + while lineix < len(lines): + lineix_begin = FindNextMultiLineCommentStart(lines, lineix) + if lineix_begin >= len(lines): + return + lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) + if lineix_end >= len(lines): + error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, + 'Could not find end of multi-line comment') + return + RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) + lineix = lineix_end + 1 + +def CleanseComments(line): + """Removes //-comments and single-line C-style /* */ comments. + + Args: + line: A line of C++ source. + + Returns: + The line with single-line comments removed. + """ + commentpos = line.find('//') + if commentpos != -1 and not IsCppString(line[:commentpos]): + line = line[:commentpos].rstrip() + # get rid of /* ... */ + return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) + +class CleansedLines(object): + """Holds 4 copies of all lines with different preprocessing applied to them. + + 1) elided member contains lines without strings and comments. + 2) lines member contains lines without comments. + 3) raw_lines member contains all the lines without processing. + 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw + strings removed. + All these members are of , and of the same length. + """ + + def __init__(self, lines): + self.elided = [] + self.lines = [] + self.raw_lines = lines + self.num_lines = len(lines) + self.lines_without_raw_strings = CleanseRawStrings(lines) + for linenum in range(len(self.lines_without_raw_strings)): + self.lines.append(CleanseComments( + self.lines_without_raw_strings[linenum])) + elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) + self.elided.append(CleanseComments(elided)) + + def NumLines(self): + """Returns the number of lines represented.""" + return self.num_lines + + @staticmethod + def _CollapseStrings(elided): + """Collapses strings and chars on a line to simple "" or '' blocks. + + We nix strings first so we're not fooled by text like '"http://"' + + Args: + elided: The line being processed. + + Returns: + The line with collapsed strings. + """ + if _RE_PATTERN_INCLUDE.match(elided): + return elided + + # Remove escaped characters first to make quote/single quote collapsing + # basic. Things that look like escaped characters shouldn't occur + # outside of strings and chars. + elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) + + # Replace quoted strings and digit separators. Both single quotes + # and double quotes are processed in the same loop, otherwise + # nested quotes wouldn't work. + collapsed = '' + while True: + # Find the first quote character + match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) + if not match: + collapsed += elided + break + head, quote, tail = match.groups() + + if quote == '"': + # Collapse double quoted strings + second_quote = tail.find('"') + if second_quote >= 0: + collapsed += head + '""' + elided = tail[second_quote + 1:] + else: + # Unmatched double quote, don't bother processing the rest + # of the line since this is probably a multiline string. + collapsed += elided + break + else: + # Found single quote, check nearby text to eliminate digit separators. + # + # There is no special handling for floating point here, because + # the integer/fractional/exponent parts would all be parsed + # correctly as long as there are digits on both sides of the + # separator. So we are fine as long as we don't see something + # like "0.'3" (gcc 4.9.0 will not allow this literal). + if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): + match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) + collapsed += head + match_literal.group(1).replace("'", '') + elided = match_literal.group(2) + else: + second_quote = tail.find('\'') + if second_quote >= 0: + collapsed += head + "''" + elided = tail[second_quote + 1:] + else: + # Unmatched single quote + collapsed += elided + break + + return collapsed + +def FindEndOfExpressionInLine(line, startpos, stack): + """Find the position just after the end of current parenthesized expression. + + Args: + line: a CleansedLines line. + startpos: start searching at this position. + stack: nesting stack at startpos. + + Returns: + On finding matching end: (index just after matching end, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at end of this line) + """ + for i in xrange(startpos, len(line)): + char = line[i] + if char in '([{': + # Found start of parenthesized expression, push to expression stack + stack.append(char) + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + if stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + elif i > 0 and Search(r'\boperator\s*$', line[0:i]): + # operator<, don't add to stack + continue + else: + # Tentative start of template argument list + stack.append('<') + elif char in ')]}': + # Found end of parenthesized expression. + # + # If we are currently expecting a matching '>', the pending '<' + # must have been an operator. Remove them from expression stack. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + if ((stack[-1] == '(' and char == ')') or + (stack[-1] == '[' and char == ']') or + (stack[-1] == '{' and char == '}')): + stack.pop() + if not stack: + return (i + 1, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == '>': + # Found potential end of template argument list. + + # Ignore "->" and operator functions + if (i > 0 and + (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))): + continue + + # Pop the stack if there is a matching '<'. Otherwise, ignore + # this '>' since it must be an operator. + if stack: + if stack[-1] == '<': + stack.pop() + if not stack: + return (i + 1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '>', the matching '<' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + + # Did not find end of expression or unbalanced parentheses on this line + return (-1, stack) + +def CloseExpression(clean_lines, linenum, pos): + """If input points to ( or { or [ or <, finds the position that closes it. + + If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the + linenum/pos that correspond to the closing of the expression. + + TODO(unknown): cpplint spends a fair bit of time matching parentheses. + Ideally we would want to index all opening and closing parentheses once + and have CloseExpression be just a simple lookup, but due to preprocessor + tricks, this is not so easy. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *past* the closing brace, or + (line, len(lines), -1) if we never find a close. Note we ignore + strings and comments when matching; and the line we return is the + 'cleansed' line at linenum. + """ + + line = clean_lines.elided[linenum] + if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]): + return (line, clean_lines.NumLines(), -1) + + # Check first line + (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) + if end_pos > -1: + return (line, linenum, end_pos) + + # Continue scanning forward + while stack and linenum < clean_lines.NumLines() - 1: + linenum += 1 + line = clean_lines.elided[linenum] + (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) + if end_pos > -1: + return (line, linenum, end_pos) + + # Did not find end of expression before end of file, give up + return (line, clean_lines.NumLines(), -1) + +def FindStartOfExpressionInLine(line, endpos, stack): + """Find position at the matching start of current expression. + + This is almost the reverse of FindEndOfExpressionInLine, but note + that the input position and returned position differs by 1. + + Args: + line: a CleansedLines line. + endpos: start searching at this position. + stack: nesting stack at endpos. + + Returns: + On finding matching start: (index at matching start, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at beginning of this line) + """ + i = endpos + while i >= 0: + char = line[i] + if char in ')]}': + # Found end of expression, push to expression stack + stack.append(char) + elif char == '>': + # Found potential end of template argument list. + # + # Ignore it if it's a "->" or ">=" or "operator>" + if (i > 0 and + (line[i - 1] == '-' or + Match(r'\s>=\s', line[i - 1:]) or + Search(r'\boperator\s*$', line[0:i]))): + i -= 1 + else: + stack.append('>') + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + i -= 1 + else: + # If there is a matching '>', we can pop the expression stack. + # Otherwise, ignore this '<' since it must be an operator. + if stack and stack[-1] == '>': + stack.pop() + if not stack: + return (i, None) + elif char in '([{': + # Found start of expression. + # + # If there are any unmatched '>' on the stack, they must be + # operators. Remove those. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + if ((char == '(' and stack[-1] == ')') or + (char == '[' and stack[-1] == ']') or + (char == '{' and stack[-1] == '}')): + stack.pop() + if not stack: + return (i, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '<', the matching '>' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + + i -= 1 + + return (-1, stack) + +def ReverseCloseExpression(clean_lines, linenum, pos): + """If input points to ) or } or ] or >, finds the position that opens it. + + If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the + linenum/pos that correspond to the opening of the expression. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *at* the opening brace, or + (line, 0, -1) if we never find the matching opening brace. Note + we ignore strings and comments when matching; and the line we + return is the 'cleansed' line at linenum. + """ + line = clean_lines.elided[linenum] + if line[pos] not in ')}]>': + return (line, 0, -1) + + # Check last line + (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) + if start_pos > -1: + return (line, linenum, start_pos) + + # Continue scanning backward + while stack and linenum > 0: + linenum -= 1 + line = clean_lines.elided[linenum] + (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) + if start_pos > -1: + return (line, linenum, start_pos) + + # Did not find start of expression before beginning of file, give up + return (line, 0, -1) + +def CheckForCopyright(filename, lines, error): + """Logs an error if no Copyright message appears at the top of the file.""" + + # We'll say it should occur by line 10. Don't forget there's a + # dummy line at the front. + for line in xrange(1, min(len(lines), 11)): + if re.search(r'Copyright', lines[line], re.I): break + else: # means no copyright line was found + error(filename, 0, 'legal/copyright', 5, + 'No copyright message found. ' + 'You should have a line: "Copyright [year] "') + +def GetIndentLevel(line): + """Return the number of leading spaces in line. + + Args: + line: A string to check. + + Returns: + An integer count of leading spaces, possibly zero. + """ + indent = Match(r'^( *)\S', line) + if indent: + return len(indent.group(1)) + else: + return 0 + +def PathSplitToList(path): + """Returns the path split into a list by the separator. + + Args: + path: An absolute or relative path (e.g. '/a/b/c/' or '../a') + + Returns: + A list of path components (e.g. ['a', 'b', 'c]). + """ + lst = [] + while True: + (head, tail) = os.path.split(path) + if head == path: # absolute paths end + lst.append(head) + break + if tail == path: # relative paths end + lst.append(tail) + break + + path = head + lst.append(tail) + + lst.reverse() + return lst + +def GetHeaderGuardCPPVariable(filename): + """Returns the CPP variable that should be used as a header guard. + + Args: + filename: The name of a C++ header file. + + Returns: + The CPP variable that should be used as a header guard in the + named file. + + """ + + # Restores original filename in case that cpplint is invoked from Emacs's + # flymake. + filename = re.sub(r'_flymake\.h$', '.h', filename) + filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) + # Replace 'c++' with 'cpp'. + filename = filename.replace('C++', 'cpp').replace('c++', 'cpp') + + fileinfo = FileInfo(filename) + file_path_from_root = fileinfo.RepositoryName() + + def FixupPathFromRoot(): + if _root_debug: + sys.stderr.write("\n_root fixup, _root = '%s', repository name = '%s'\n" + % (_root, fileinfo.RepositoryName())) + + # Process the file path with the --root flag if it was set. + if not _root: + if _root_debug: + sys.stderr.write("_root unspecified\n") + return file_path_from_root + + def StripListPrefix(lst, prefix): + # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) + if lst[:len(prefix)] != prefix: + return None + # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] + return lst[(len(prefix)):] + + # root behavior: + # --root=subdir , lstrips subdir from the header guard + maybe_path = StripListPrefix(PathSplitToList(file_path_from_root), + PathSplitToList(_root)) + + if _root_debug: + sys.stderr.write(("_root lstrip (maybe_path=%s, file_path_from_root=%s," + + " _root=%s)\n") % (maybe_path, file_path_from_root, _root)) + + if maybe_path: + return os.path.join(*maybe_path) + + # --root=.. , will prepend the outer directory to the header guard + full_path = fileinfo.FullName() + root_abspath = os.path.abspath(_root) + + maybe_path = StripListPrefix(PathSplitToList(full_path), + PathSplitToList(root_abspath)) + + if _root_debug: + sys.stderr.write(("_root prepend (maybe_path=%s, full_path=%s, " + + "root_abspath=%s)\n") % (maybe_path, full_path, root_abspath)) + + if maybe_path: + return os.path.join(*maybe_path) + + if _root_debug: + sys.stderr.write("_root ignore, returning %s\n" % (file_path_from_root)) + + # --root=FAKE_DIR is ignored + return file_path_from_root + + file_path_from_root = FixupPathFromRoot() + return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_' + +def CheckForHeaderGuard(filename, clean_lines, error): + """Checks that the file contains a header guard. + + Logs an error if no #ifndef header guard is present. For other + headers, checks that the full pathname is used. + + Args: + filename: The name of the C++ header file. + clean_lines: A CleansedLines instance containing the file. + error: The function to call with any errors found. + """ + + # Don't check for header guards if there are error suppression + # comments somewhere in this file. + # + # Because this is silencing a warning for a nonexistent line, we + # only support the very specific NOLINT(build/header_guard) syntax, + # and not the general NOLINT or NOLINT(*) syntax. + raw_lines = clean_lines.lines_without_raw_strings + for i in raw_lines: + if Search(r'//\s*NOLINT\(build/header_guard\)', i): + return + + # Allow pragma once instead of header guards + for i in raw_lines: + if Search(r'^\s*#pragma\s+once', i): + return + + cppvar = GetHeaderGuardCPPVariable(filename) + + ifndef = '' + ifndef_linenum = 0 + define = '' + endif = '' + endif_linenum = 0 + for linenum, line in enumerate(raw_lines): + linesplit = line.split() + if len(linesplit) >= 2: + # find the first occurrence of #ifndef and #define, save arg + if not ifndef and linesplit[0] == '#ifndef': + # set ifndef to the header guard presented on the #ifndef line. + ifndef = linesplit[1] + ifndef_linenum = linenum + if not define and linesplit[0] == '#define': + define = linesplit[1] + # find the last occurrence of #endif, save entire line + if line.startswith('#endif'): + endif = line + endif_linenum = linenum + + if not ifndef or not define or ifndef != define: + error(filename, 0, 'build/header_guard', 5, + 'No #ifndef header guard found, suggested CPP variable is: %s' % + cppvar) + return + + # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ + # for backward compatibility. + if ifndef != cppvar: + error_level = 0 + if ifndef != cppvar + '_': + error_level = 5 + + ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum, + error) + error(filename, ifndef_linenum, 'build/header_guard', error_level, + '#ifndef header guard has wrong style, please use: %s' % cppvar) + + # Check for "//" comments on endif line. + ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, + error) + match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) + if match: + if match.group(1) == '_': + # Issue low severity warning for deprecated double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif // %s"' % cppvar) + return + + # Didn't find the corresponding "//" comment. If this file does not + # contain any "//" comments at all, it could be that the compiler + # only wants "/**/" comments, look for those instead. + no_single_line_comments = True + for i in xrange(1, len(raw_lines) - 1): + line = raw_lines[i] + if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): + no_single_line_comments = False + break + + if no_single_line_comments: + match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) + if match: + if match.group(1) == '_': + # Low severity warning for double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif /* %s */"' % cppvar) + return + + # Didn't find anything + error(filename, endif_linenum, 'build/header_guard', 5, + '#endif line should be "#endif // %s"' % cppvar) + +def CheckHeaderFileIncluded(filename, include_state, error): + """Logs an error if a source file does not include its header.""" + + # Do not check test files + fileinfo = FileInfo(filename) + if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): + return + + for ext in GetHeaderExtensions(): + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + headerfile = basefilename + '.' + ext + if not os.path.exists(headerfile): + continue + headername = FileInfo(headerfile).RepositoryName() + first_include = None + include_uses_unix_dir_aliases = False + for section_list in include_state.include_list: + for f in section_list: + include_text = f[0] + if "./" in include_text: + include_uses_unix_dir_aliases = True + if headername in include_text or include_text in headername: + return + if not first_include: + first_include = f[1] + + message = '%s should include its header file %s' % (fileinfo.RepositoryName(), headername) + if include_uses_unix_dir_aliases: + message += ". Relative paths like . and .. are not allowed." + + error(filename, first_include, 'build/include', 5, message) + +def CheckForBadCharacters(filename, lines, error): + """Logs an error for each line containing bad characters. + + Two kinds of bad characters: + + 1. Unicode replacement characters: These indicate that either the file + contained invalid UTF-8 (likely) or Unicode replacement characters (which + it shouldn't). Note that it's possible for this to throw off line + numbering if the invalid UTF-8 occurred adjacent to a newline. + + 2. NUL bytes. These are problematic for some tools. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + for linenum, line in enumerate(lines): + if unicode_escape_decode('\ufffd') in line: + error(filename, linenum, 'readability/utf8', 5, + 'Line contains invalid UTF-8 (or Unicode replacement character).') + if '\0' in line: + error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') + +def CheckForNewlineAtEOF(filename, lines, error): + """Logs an error if there is no newline char at the end of the file. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + + # The array lines() was created by adding two newlines to the + # original file (go figure), then splitting on \n. + # To verify that the file ends in \n, we just have to make sure the + # last-but-two element of lines() exists and is empty. + if len(lines) < 3 or lines[-2]: + error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, + 'Could not find a newline character at the end of the file.') + +def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): + """Logs an error if we see /* ... */ or "..." that extend past one line. + + /* ... */ comments are legit inside macros, for one line. + Otherwise, we prefer // comments, so it's ok to warn about the + other. Likewise, it's ok for strings to extend across multiple + lines, as long as a line continuation character (backslash) + terminates each line. Although not currently prohibited by the C++ + style guide, it's ugly and unnecessary. We don't do well with either + in this lint program, so we warn about both. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remove all \\ (escaped backslashes) from the line. They are OK, and the + # second (escaped) slash may trigger later \" detection erroneously. + line = line.replace('\\\\', '') + + if line.count('/*') > line.count('*/'): + error(filename, linenum, 'readability/multiline_comment', 5, + 'Complex multi-line /*...*/-style comment found. ' + 'Lint may give bogus warnings. ' + 'Consider replacing these with //-style comments, ' + 'with #if 0...#endif, ' + 'or with more clearly structured multi-line comments.') + + if (line.count('"') - line.count('\\"')) % 2: + error(filename, linenum, 'readability/multiline_string', 5, + 'Multi-line string ("...") found. This lint script doesn\'t ' + 'do well with such strings, and may give bogus warnings. ' + 'Use C++11 raw strings or concatenation instead.') + +# (non-threadsafe name, thread-safe alternative, validation pattern) +# +# The validation pattern is used to eliminate false positives such as: +# _rand(); // false positive due to substring match. +# ->rand(); // some member function rand(). +# ACMRandom rand(seed); // some variable named rand. +# ISAACRandom rand(); // another variable named rand. +# +# Basically we require the return value of these functions to be used +# in some expression context on the same line by matching on some +# operator before the function name. This eliminates constructors and +# member function calls. +_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' +_THREADING_LIST = ( + ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), + ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), + ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), + ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), + ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), + ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), + ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), + ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), + ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), + ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), + ('strtok(', 'strtok_r(', + _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), + ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), + ) + +def CheckPosixThreading(filename, clean_lines, linenum, error): + """Checks for calls to thread-unsafe functions. + + Much code has been originally written without consideration of + multi-threading. Also, engineers are relying on their old experience; + they have learned posix before threading extensions were added. These + tests guide the engineers to use thread-safe functions (when using + posix directly). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: + # Additional pattern matching check to confirm that this is the + # function we are looking for + if Search(pattern, line): + error(filename, linenum, 'runtime/threadsafe_fn', 2, + 'Consider using ' + multithread_safe_func + + '...) instead of ' + single_thread_func + + '...) for improved thread safety.') + +def CheckVlogArguments(filename, clean_lines, linenum, error): + """Checks that VLOG() is only used for defining a logging level. + + For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and + VLOG(FATAL) are not. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): + error(filename, linenum, 'runtime/vlog', 5, + 'VLOG() should be used with numeric verbosity level. ' + 'Use LOG() if you want symbolic severity levels.') + +# Matches invalid increment: *count++, which moves pointer instead of +# incrementing a value. +_RE_PATTERN_INVALID_INCREMENT = re.compile( + r'^\s*\*\w+(\+\+|--);') + +def CheckInvalidIncrement(filename, clean_lines, linenum, error): + """Checks for invalid increment *count++. + + For example following function: + void increment_counter(int* count) { + *count++; + } + is invalid, because it effectively does count++, moving pointer, and should + be replaced with ++*count, (*count)++ or *count += 1. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if _RE_PATTERN_INVALID_INCREMENT.match(line): + error(filename, linenum, 'runtime/invalid_increment', 5, + 'Changing pointer instead of value (or unused value of operator*).') + +def IsMacroDefinition(clean_lines, linenum): + if Search(r'^#define', clean_lines[linenum]): + return True + + if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]): + return True + + return False + +def IsForwardClassDeclaration(clean_lines, linenum): + return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) + +class _BlockInfo(object): + """Stores information about a generic block of code.""" + + def __init__(self, linenum, seen_open_brace): + self.starting_linenum = linenum + self.seen_open_brace = seen_open_brace + self.open_parentheses = 0 + self.inline_asm = _NO_ASM + self.check_namespace_indentation = False + + def CheckBegin(self, filename, clean_lines, linenum, error): + """Run checks that applies to text up to the opening brace. + + This is mostly for checking the text after the class identifier + and the "{", usually where the base class is specified. For other + blocks, there isn't much to check, so we always pass. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Run checks that applies to text after the closing brace. + + This is mostly used for checking end of namespace comments. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def IsBlockInfo(self): + """Returns true if this block is a _BlockInfo. + + This is convenient for verifying that an object is an instance of + a _BlockInfo, but not an instance of any of the derived classes. + + Returns: + True for this class, False for derived classes. + """ + return self.__class__ == _BlockInfo + +class _ExternCInfo(_BlockInfo): + """Stores information about an 'extern "C"' block.""" + + def __init__(self, linenum): + _BlockInfo.__init__(self, linenum, True) + +class _ClassInfo(_BlockInfo): + """Stores information about a class.""" + + def __init__(self, name, class_or_struct, clean_lines, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name + self.is_derived = False + self.check_namespace_indentation = True + if class_or_struct == 'struct': + self.access = 'public' + self.is_struct = True + else: + self.access = 'private' + self.is_struct = False + + # Remember initial indentation level for this class. Using raw_lines here + # instead of elided to account for leading comments. + self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) + + # Try to find the end of the class. This will be confused by things like: + # class A { + # } *x = { ... + # + # But it's still good enough for CheckSectionSpacing. + self.last_line = 0 + depth = 0 + for i in range(linenum, clean_lines.NumLines()): + line = clean_lines.elided[i] + depth += line.count('{') - line.count('}') + if not depth: + self.last_line = i + break + + def CheckBegin(self, filename, clean_lines, linenum, error): + # Look for a bare ':' + if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): + self.is_derived = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + # If there is a DISALLOW macro, it should appear near the end of + # the class. + seen_last_thing_in_class = False + for i in xrange(linenum - 1, self.starting_linenum, -1): + match = Search( + r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' + + self.name + r'\)', + clean_lines.elided[i]) + if match: + if seen_last_thing_in_class: + error(filename, i, 'readability/constructors', 3, + match.group(1) + ' should be the last thing in the class') + break + + if not Match(r'^\s*$', clean_lines.elided[i]): + seen_last_thing_in_class = True + + # Check that closing brace is aligned with beginning of the class. + # Only do this if the closing brace is indented by only whitespaces. + # This means we will not check single-line class definitions. + indent = Match(r'^( *)\}', clean_lines.elided[linenum]) + if indent and len(indent.group(1)) != self.class_indent: + if self.is_struct: + parent = 'struct ' + self.name + else: + parent = 'class ' + self.name + error(filename, linenum, 'whitespace/indent', 3, + 'Closing brace should be aligned with beginning of %s' % parent) + +class _NamespaceInfo(_BlockInfo): + """Stores information about a namespace.""" + + def __init__(self, name, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name or '' + self.check_namespace_indentation = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Check end of namespace comments.""" + line = clean_lines.raw_lines[linenum] + + # Check how many lines is enclosed in this namespace. Don't issue + # warning for missing namespace comments if there aren't enough + # lines. However, do apply checks if there is already an end of + # namespace comment and it's incorrect. + # + # TODO(unknown): We always want to check end of namespace comments + # if a namespace is large, but sometimes we also want to apply the + # check if a short namespace contained nontrivial things (something + # other than forward declarations). There is currently no logic on + # deciding what these nontrivial things are, so this check is + # triggered by namespace size only, which works most of the time. + if (linenum - self.starting_linenum < 10 + and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): + return + + # Look for matching comment at end of namespace. + # + # Note that we accept C style "/* */" comments for terminating + # namespaces, so that code that terminate namespaces inside + # preprocessor macros can be cpplint clean. + # + # We also accept stuff like "// end of namespace ." with the + # period at the end. + # + # Besides these, we don't accept anything else, otherwise we might + # get false negatives when existing comment is a substring of the + # expected namespace. + if self.name: + # Named namespace + if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + + re.escape(self.name) + r'[\*/\.\\\s]*$'), + line): + error(filename, linenum, 'readability/namespace', 5, + 'Namespace should be terminated with "// namespace %s"' % + self.name) + else: + # Anonymous namespace + if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): + # If "// namespace anonymous" or "// anonymous namespace (more text)", + # mention "// anonymous namespace" as an acceptable form + if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"' + ' or "// anonymous namespace"') + else: + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"') + +class _PreprocessorInfo(object): + """Stores checkpoints of nesting stacks when #if/#else is seen.""" + + def __init__(self, stack_before_if): + # The entire nesting stack before #if + self.stack_before_if = stack_before_if + + # The entire nesting stack up to #else + self.stack_before_else = [] + + # Whether we have already seen #else or #elif + self.seen_else = False + +class NestingState(object): + """Holds states related to parsing braces.""" + + def __init__(self): + # Stack for tracking all braces. An object is pushed whenever we + # see a "{", and popped when we see a "}". Only 3 types of + # objects are possible: + # - _ClassInfo: a class or struct. + # - _NamespaceInfo: a namespace. + # - _BlockInfo: some other type of block. + self.stack = [] + + # Top of the previous stack before each Update(). + # + # Because the nesting_stack is updated at the end of each line, we + # had to do some convoluted checks to find out what is the current + # scope at the beginning of the line. This check is simplified by + # saving the previous top of nesting stack. + # + # We could save the full stack, but we only need the top. Copying + # the full nesting stack would slow down cpplint by ~10%. + self.previous_stack_top = [] + + # Stack of _PreprocessorInfo objects. + self.pp_stack = [] + + def SeenOpenBrace(self): + """Check if we have seen the opening brace for the innermost block. + + Returns: + True if we have seen the opening brace, False if the innermost + block is still expecting an opening brace. + """ + return (not self.stack) or self.stack[-1].seen_open_brace + + def InNamespaceBody(self): + """Check if we are currently one level inside a namespace body. + + Returns: + True if top of the stack is a namespace block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _NamespaceInfo) + + def InExternC(self): + """Check if we are currently one level inside an 'extern "C"' block. + + Returns: + True if top of the stack is an extern block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ExternCInfo) + + def InClassDeclaration(self): + """Check if we are currently one level inside a class or struct declaration. + + Returns: + True if top of the stack is a class/struct, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ClassInfo) + + def InAsmBlock(self): + """Check if we are currently one level inside an inline ASM block. + + Returns: + True if the top of the stack is a block containing inline ASM. + """ + return self.stack and self.stack[-1].inline_asm != _NO_ASM + + def InTemplateArgumentList(self, clean_lines, linenum, pos): + """Check if current position is inside template argument list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: position just after the suspected template argument. + Returns: + True if (linenum, pos) is inside template arguments. + """ + while linenum < clean_lines.NumLines(): + # Find the earliest character that might indicate a template argument + line = clean_lines.elided[linenum] + match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) + if not match: + linenum += 1 + pos = 0 + continue + token = match.group(1) + pos += len(match.group(0)) + + # These things do not look like template argument list: + # class Suspect { + # class Suspect x; } + if token in ('{', '}', ';'): return False + + # These things look like template argument list: + # template + # template + # template + # template + if token in ('>', '=', '[', ']', '.'): return True + + # Check if token is an unmatched '<'. + # If not, move on to the next character. + if token != '<': + pos += 1 + if pos >= len(line): + linenum += 1 + pos = 0 + continue + + # We can't be sure if we just find a single '<', and need to + # find the matching '>'. + (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) + if end_pos < 0: + # Not sure if template argument list or syntax error in file + return False + linenum = end_line + pos = end_pos + return False + + def UpdatePreprocessor(self, line): + """Update preprocessor stack. + + We need to handle preprocessors due to classes like this: + #ifdef SWIG + struct ResultDetailsPageElementExtensionPoint { + #else + struct ResultDetailsPageElementExtensionPoint : public Extension { + #endif + + We make the following assumptions (good enough for most files): + - Preprocessor condition evaluates to true from #if up to first + #else/#elif/#endif. + + - Preprocessor condition evaluates to false from #else/#elif up + to #endif. We still perform lint checks on these lines, but + these do not affect nesting stack. + + Args: + line: current line to check. + """ + if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): + # Beginning of #if block, save the nesting stack here. The saved + # stack will allow us to restore the parsing state in the #else case. + self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) + elif Match(r'^\s*#\s*(else|elif)\b', line): + # Beginning of #else block + if self.pp_stack: + if not self.pp_stack[-1].seen_else: + # This is the first #else or #elif block. Remember the + # whole nesting stack up to this point. This is what we + # keep after the #endif. + self.pp_stack[-1].seen_else = True + self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) + + # Restore the stack to how it was before the #if + self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) + else: + # TODO(unknown): unexpected #else, issue warning? + pass + elif Match(r'^\s*#\s*endif\b', line): + # End of #if or #else blocks. + if self.pp_stack: + # If we saw an #else, we will need to restore the nesting + # stack to its former state before the #else, otherwise we + # will just continue from where we left off. + if self.pp_stack[-1].seen_else: + # Here we can just use a shallow copy since we are the last + # reference to it. + self.stack = self.pp_stack[-1].stack_before_else + # Drop the corresponding #if + self.pp_stack.pop() + else: + # TODO(unknown): unexpected #endif, issue warning? + pass + + # TODO(unknown): Update() is too long, but we will refactor later. + def Update(self, filename, clean_lines, linenum, error): + """Update nesting state with current line. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remember top of the previous nesting stack. + # + # The stack is always pushed/popped and not modified in place, so + # we can just do a shallow copy instead of copy.deepcopy. Using + # deepcopy would slow down cpplint by ~28%. + if self.stack: + self.previous_stack_top = self.stack[-1] + else: + self.previous_stack_top = None + + # Update pp_stack + self.UpdatePreprocessor(line) + + # Count parentheses. This is to avoid adding struct arguments to + # the nesting stack. + if self.stack: + inner_block = self.stack[-1] + depth_change = line.count('(') - line.count(')') + inner_block.open_parentheses += depth_change + + # Also check if we are starting or ending an inline assembly block. + if inner_block.inline_asm in (_NO_ASM, _END_ASM): + if (depth_change != 0 and + inner_block.open_parentheses == 1 and + _MATCH_ASM.match(line)): + # Enter assembly block + inner_block.inline_asm = _INSIDE_ASM + else: + # Not entering assembly block. If previous line was _END_ASM, + # we will now shift to _NO_ASM state. + inner_block.inline_asm = _NO_ASM + elif (inner_block.inline_asm == _INSIDE_ASM and + inner_block.open_parentheses == 0): + # Exit assembly block + inner_block.inline_asm = _END_ASM + + # Consume namespace declaration at the beginning of the line. Do + # this in a loop so that we catch same line declarations like this: + # namespace proto2 { namespace bridge { class MessageSet; } } + while True: + # Match start of namespace. The "\b\s*" below catches namespace + # declarations even if it weren't followed by a whitespace, this + # is so that we don't confuse our namespace checker. The + # missing spaces will be flagged by CheckSpacing. + namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) + if not namespace_decl_match: + break + + new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) + self.stack.append(new_namespace) + + line = namespace_decl_match.group(2) + if line.find('{') != -1: + new_namespace.seen_open_brace = True + line = line[line.find('{') + 1:] + + # Look for a class declaration in whatever is left of the line + # after parsing namespaces. The regexp accounts for decorated classes + # such as in: + # class LOCKABLE API Object { + # }; + class_decl_match = Match( + r'^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?' + r'(class|struct)\s+(?:[a-zA-Z0-9_]+\s+)*(\w+(?:::\w+)*))' + r'(.*)$', line) + if (class_decl_match and + (not self.stack or self.stack[-1].open_parentheses == 0)): + # We do not want to accept classes that are actually template arguments: + # template , + # template class Ignore3> + # void Function() {}; + # + # To avoid template argument cases, we scan forward and look for + # an unmatched '>'. If we see one, assume we are inside a + # template argument list. + end_declaration = len(class_decl_match.group(1)) + if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): + self.stack.append(_ClassInfo( + class_decl_match.group(3), class_decl_match.group(2), + clean_lines, linenum)) + line = class_decl_match.group(4) + + # If we have not yet seen the opening brace for the innermost block, + # run checks here. + if not self.SeenOpenBrace(): + self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) + + # Update access control if we are inside a class/struct + if self.stack and isinstance(self.stack[-1], _ClassInfo): + classinfo = self.stack[-1] + access_match = Match( + r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' + r':(?:[^:]|$)', + line) + if access_match: + classinfo.access = access_match.group(2) + + # Check that access keywords are indented +1 space. Skip this + # check if the keywords are not preceded by whitespaces. + indent = access_match.group(1) + if (len(indent) != classinfo.class_indent + 1 and + Match(r'^\s*$', indent)): + if classinfo.is_struct: + parent = 'struct ' + classinfo.name + else: + parent = 'class ' + classinfo.name + slots = '' + if access_match.group(3): + slots = access_match.group(3) + error(filename, linenum, 'whitespace/indent', 3, + '%s%s: should be indented +1 space inside %s' % ( + access_match.group(2), slots, parent)) + + # Consume braces or semicolons from what's left of the line + while True: + # Match first brace, semicolon, or closed parenthesis. + matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) + if not matched: + break + + token = matched.group(1) + if token == '{': + # If namespace or class hasn't seen a opening brace yet, mark + # namespace/class head as complete. Push a new block onto the + # stack otherwise. + if not self.SeenOpenBrace(): + self.stack[-1].seen_open_brace = True + elif Match(r'^extern\s*"[^"]*"\s*\{', line): + self.stack.append(_ExternCInfo(linenum)) + else: + self.stack.append(_BlockInfo(linenum, True)) + if _MATCH_ASM.match(line): + self.stack[-1].inline_asm = _BLOCK_ASM + + elif token == ';' or token == ')': + # If we haven't seen an opening brace yet, but we already saw + # a semicolon, this is probably a forward declaration. Pop + # the stack for these. + # + # Similarly, if we haven't seen an opening brace yet, but we + # already saw a closing parenthesis, then these are probably + # function arguments with extra "class" or "struct" keywords. + # Also pop these stack for these. + if not self.SeenOpenBrace(): + self.stack.pop() + else: # token == '}' + # Perform end of block checks and pop the stack. + if self.stack: + self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) + self.stack.pop() + line = matched.group(2) + + def InnermostClass(self): + """Get class info on the top of the stack. + + Returns: + A _ClassInfo object if we are inside a class, or None otherwise. + """ + for i in range(len(self.stack), 0, -1): + classinfo = self.stack[i - 1] + if isinstance(classinfo, _ClassInfo): + return classinfo + return None + + def CheckCompletedBlocks(self, filename, error): + """Checks that all classes and namespaces have been completely parsed. + + Call this when all lines in a file have been processed. + Args: + filename: The name of the current file. + error: The function to call with any errors found. + """ + # Note: This test can result in false positives if #ifdef constructs + # get in the way of brace matching. See the testBuildClass test in + # cpplint_unittest.py for an example of this. + for obj in self.stack: + if isinstance(obj, _ClassInfo): + error(filename, obj.starting_linenum, 'build/class', 5, + 'Failed to find complete declaration of class %s' % + obj.name) + elif isinstance(obj, _NamespaceInfo): + error(filename, obj.starting_linenum, 'build/namespaces', 5, + 'Failed to find complete declaration of namespace %s' % + obj.name) + +def CheckForNonStandardConstructs(filename, clean_lines, linenum, + nesting_state, error): + r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. + + Complain about several constructs which gcc-2 accepts, but which are + not standard C++. Warning about these in lint is one way to ease the + transition to new compilers. + - put storage class first (e.g. "static const" instead of "const static"). + - "%lld" instead of %qd" in printf-type functions. + - "%1$d" is non-standard in printf-type functions. + - "\%" is an undefined character escape sequence. + - text after #endif is not allowed. + - invalid inner-style forward declaration. + - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', + line): + error(filename, linenum, 'build/deprecated', 3, + '>? and ))?' + # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' + error(filename, linenum, 'runtime/member_string_references', 2, + 'const string& members are dangerous. It is much better to use ' + 'alternatives, such as pointers or simple constants.') + + # Everything else in this function operates on class declarations. + # Return early if the top of the nesting stack is not a class, or if + # the class head is not completed yet. + classinfo = nesting_state.InnermostClass() + if not classinfo or not classinfo.seen_open_brace: + return + + # The class may have been declared with namespace or classname qualifiers. + # The constructor and destructor will not have those qualifiers. + base_classname = classinfo.name.split('::')[-1] + + # Look for single-argument constructors that aren't marked explicit. + # Technically a valid construct, but against style. + explicit_constructor_match = Match( + r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?' + r'(?:(?:inline|constexpr)\s+)*%s\s*' + r'\(((?:[^()]|\([^()]*\))*)\)' + % re.escape(base_classname), + line) + + if explicit_constructor_match: + is_marked_explicit = explicit_constructor_match.group(1) + + if not explicit_constructor_match.group(2): + constructor_args = [] + else: + constructor_args = explicit_constructor_match.group(2).split(',') + + # collapse arguments so that commas in template parameter lists and function + # argument parameter lists don't split arguments in two + i = 0 + while i < len(constructor_args): + constructor_arg = constructor_args[i] + while (constructor_arg.count('<') > constructor_arg.count('>') or + constructor_arg.count('(') > constructor_arg.count(')')): + constructor_arg += ',' + constructor_args[i + 1] + del constructor_args[i + 1] + constructor_args[i] = constructor_arg + i += 1 + + variadic_args = [arg for arg in constructor_args if '&&...' in arg] + defaulted_args = [arg for arg in constructor_args if '=' in arg] + noarg_constructor = (not constructor_args or # empty arg list + # 'void' arg specifier + (len(constructor_args) == 1 and + constructor_args[0].strip() == 'void')) + onearg_constructor = ((len(constructor_args) == 1 and # exactly one arg + not noarg_constructor) or + # all but at most one arg defaulted + (len(constructor_args) >= 1 and + not noarg_constructor and + len(defaulted_args) >= len(constructor_args) - 1) or + # variadic arguments with zero or one argument + (len(constructor_args) <= 2 and + len(variadic_args) >= 1)) + initializer_list_constructor = bool( + onearg_constructor and + Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) + copy_constructor = bool( + onearg_constructor and + Match(r'((const\s+(volatile\s+)?)?|(volatile\s+(const\s+)?))?' + r'%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&' + % re.escape(base_classname), constructor_args[0].strip())) + + if (not is_marked_explicit and + onearg_constructor and + not initializer_list_constructor and + not copy_constructor): + if defaulted_args or variadic_args: + error(filename, linenum, 'runtime/explicit', 5, + 'Constructors callable with one argument ' + 'should be marked explicit.') + else: + error(filename, linenum, 'runtime/explicit', 5, + 'Single-parameter constructors should be marked explicit.') + elif is_marked_explicit and not onearg_constructor: + if noarg_constructor: + error(filename, linenum, 'runtime/explicit', 5, + 'Zero-parameter constructors should not be marked explicit.') + +def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): + """Checks for the correctness of various spacing around function calls. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Since function calls often occur inside if/for/while/switch + # expressions - which have their own, more liberal conventions - we + # first see if we should be looking inside such an expression for a + # function call, to which we can apply more strict standards. + fncall = line # if there's no control flow construct, look at whole line + for pattern in (r'\bif\s*\((.*)\)\s*{', + r'\bfor\s*\((.*)\)\s*{', + r'\bwhile\s*\((.*)\)\s*[{;]', + r'\bswitch\s*\((.*)\)\s*{'): + match = Search(pattern, line) + if match: + fncall = match.group(1) # look inside the parens for function calls + break + + # Except in if/for/while/switch, there should never be space + # immediately inside parens (eg "f( 3, 4 )"). We make an exception + # for nested parens ( (a+b) + c ). Likewise, there should never be + # a space before a ( when it's a function argument. I assume it's a + # function argument when the char before the whitespace is legal in + # a function name (alnum + _) and we're not starting a macro. Also ignore + # pointers and references to arrays and functions coz they're too tricky: + # we use a very simple way to recognize these: + # " (something)(maybe-something)" or + # " (something)(maybe-something," or + # " (something)[something]" + # Note that we assume the contents of [] to be short enough that + # they'll never need to wrap. + if ( # Ignore control structures. + not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b', + fncall) and + # Ignore pointers/references to functions. + not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and + # Ignore pointers/references to arrays. + not Search(r' \([^)]+\)\[[^\]]+\]', fncall)): + if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space after ( in function call') + elif Search(r'\(\s+(?!(\s*\\)|\()', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space after (') + if (Search(r'\w\s+\(', fncall) and + not Search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and + not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and + not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and + not Search(r'\bcase\s+\(', fncall)): + # TODO(unknown): Space after an operator function seem to be a common + # error, silence those for now by restricting them to highest verbosity. + if Search(r'\boperator_*\b', line): + error(filename, linenum, 'whitespace/parens', 0, + 'Extra space before ( in function call') + else: + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space before ( in function call') + # If the ) is followed only by a newline or a { + newline, assume it's + # part of a control statement (if/while/etc), and don't complain + if Search(r'[^)]\s+\)\s*[^{\s]', fncall): + # If the closing parenthesis is preceded by only whitespaces, + # try to give a more descriptive error message. + if Search(r'^\s+\)', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Closing ) should be moved to the previous line') + else: + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space before )') + +def IsBlankLine(line): + """Returns true if the given line is blank. + + We consider a line to be blank if the line is empty or consists of + only white spaces. + + Args: + line: A line of a string. + + Returns: + True, if the given line is blank. + """ + return not line or line.isspace() + +def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error): + is_namespace_indent_item = ( + len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and + nesting_state.previous_stack_top == nesting_state.stack[-2]) + + if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + clean_lines.elided, line): + CheckItemIndentationInNamespace(filename, clean_lines.elided, + line, error) + +def CheckForFunctionLengths(filename, clean_lines, linenum, + function_state, error): + """Reports for long function bodies. + + For an overview why this is done, see: + https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions + + Uses a simplistic algorithm assuming other style guidelines + (especially spacing) are followed. + Only checks unindented functions, so class members are unchecked. + Trivial bodies are unchecked, so constructors with huge initializer lists + may be missed. + Blank/comment lines are not counted so as to avoid encouraging the removal + of vertical space and comments just to get through a lint check. + NOLINT *on the last line of a function* disables this check. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + function_state: Current function name and lines in body so far. + error: The function to call with any errors found. + """ + lines = clean_lines.lines + line = lines[linenum] + joined_line = '' + + starting_func = False + regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... + match_result = Match(regexp, line) + if match_result: + # If the name is all caps and underscores, figure it's a macro and + # ignore it, unless it's TEST or TEST_F. + function_name = match_result.group(1).split()[-1] + if function_name == 'TEST' or function_name == 'TEST_F' or ( + not Match(r'[A-Z_]+$', function_name)): + starting_func = True + + if starting_func: + body_found = False + for start_linenum in xrange(linenum, clean_lines.NumLines()): + start_line = lines[start_linenum] + joined_line += ' ' + start_line.lstrip() + if Search(r'(;|})', start_line): # Declarations and trivial functions + body_found = True + break # ... ignore + if Search(r'{', start_line): + body_found = True + function = Search(r'((\w|:)*)\(', line).group(1) + if Match(r'TEST', function): # Handle TEST... macros + parameter_regexp = Search(r'(\(.*\))', joined_line) + if parameter_regexp: # Ignore bad syntax + function += parameter_regexp.group(1) + else: + function += '()' + function_state.Begin(function) + break + if not body_found: + # No body for the function (or evidence of a non-function) was found. + error(filename, linenum, 'readability/fn_size', 5, + 'Lint failed to find start of function body.') + elif Match(r'^\}\s*$', line): # function end + function_state.Check(error, filename, linenum) + function_state.End() + elif not Match(r'^\s*$', line): + function_state.Count() # Count non-blank/non-comment lines. + +_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') + +def CheckComment(line, filename, linenum, next_line_start, error): + """Checks for common mistakes in comments. + + Args: + line: The line in question. + filename: The name of the current file. + linenum: The number of the line to check. + next_line_start: The first non-whitespace column of the next line. + error: The function to call with any errors found. + """ + commentpos = line.find('//') + if commentpos != -1: + # Check if the // may be in quotes. If so, ignore it + if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: + # Allow one space for new scopes, two spaces otherwise: + if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and + ((commentpos >= 1 and + line[commentpos-1] not in string.whitespace) or + (commentpos >= 2 and + line[commentpos-2] not in string.whitespace))): + error(filename, linenum, 'whitespace/comments', 2, + 'At least two spaces is best between code and comments') + + # Checks for common mistakes in TODO comments. + comment = line[commentpos:] + match = _RE_PATTERN_TODO.match(comment) + if match: + # One whitespace is correct; zero whitespace is handled elsewhere. + leading_whitespace = match.group(1) + if len(leading_whitespace) > 1: + error(filename, linenum, 'whitespace/todo', 2, + 'Too many spaces before TODO') + + username = match.group(2) + if not username: + error(filename, linenum, 'readability/todo', 2, + 'Missing username in TODO; it should look like ' + '"// TODO(my_username): Stuff."') + + middle_whitespace = match.group(3) + # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison + if middle_whitespace != ' ' and middle_whitespace != '': + error(filename, linenum, 'whitespace/todo', 2, + 'TODO(my_username) should be followed by a space') + + # If the comment contains an alphanumeric character, there + # should be a space somewhere between it and the // unless + # it's a /// or //! Doxygen comment. + if (Match(r'//[^ ]*\w', comment) and + not Match(r'(///|//\!)(\s+|$)', comment)): + error(filename, linenum, 'whitespace/comments', 4, + 'Should have a space between // and comment') + +def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for the correctness of various spacing issues in the code. + + Things we check for: spaces around operators, spaces after + if/for/while/switch, no spaces around parens in function calls, two + spaces between code and comment, don't start a block with a blank + line, don't end a function with a blank line, don't add a blank line + after public/protected/private, don't have too many blank lines in a row. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw = clean_lines.lines_without_raw_strings + line = raw[linenum] + + # Before nixing comments, check if the line is blank for no good + # reason. This includes the first line after a block is opened, and + # blank lines at the end of a function (ie, right before a line like '}' + # + # Skip all the blank line checks if we are immediately inside a + # namespace body. In other words, don't issue blank line warnings + # for this block: + # namespace { + # + # } + # + # A warning about missing end of namespace comments will be issued instead. + # + # Also skip blank line checks for 'extern "C"' blocks, which are formatted + # like namespaces. + if (IsBlankLine(line) and + not nesting_state.InNamespaceBody() and + not nesting_state.InExternC()): + elided = clean_lines.elided + prev_line = elided[linenum - 1] + prevbrace = prev_line.rfind('{') + # TODO(unknown): Don't complain if line before blank line, and line after, + # both start with alnums and are indented the same amount. + # This ignores whitespace at the start of a namespace block + # because those are not usually indented. + if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: + # OK, we have a blank line at the start of a code block. Before we + # complain, we check if it is an exception to the rule: The previous + # non-empty line has the parameters of a function header that are indented + # 4 spaces (because they did not fit in a 80 column line when placed on + # the same line as the function name). We also check for the case where + # the previous line is indented 6 spaces, which may happen when the + # initializers of a constructor do not fit into a 80 column line. + exception = False + if Match(r' {6}\w', prev_line): # Initializer list? + # We are looking for the opening column of initializer list, which + # should be indented 4 spaces to cause 6 space indentation afterwards. + search_position = linenum-2 + while (search_position >= 0 + and Match(r' {6}\w', elided[search_position])): + search_position -= 1 + exception = (search_position >= 0 + and elided[search_position][:5] == ' :') + else: + # Search for the function arguments or an initializer list. We use a + # simple heuristic here: If the line is indented 4 spaces; and we have a + # closing paren, without the opening paren, followed by an opening brace + # or colon (for initializer lists) we assume that it is the last line of + # a function header. If we have a colon indented 4 spaces, it is an + # initializer list. + exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', + prev_line) + or Match(r' {4}:', prev_line)) + + if not exception: + error(filename, linenum, 'whitespace/blank_line', 2, + 'Redundant blank line at the start of a code block ' + 'should be deleted.') + # Ignore blank lines at the end of a block in a long if-else + # chain, like this: + # if (condition1) { + # // Something followed by a blank line + # + # } else if (condition2) { + # // Something else + # } + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + if (next_line + and Match(r'\s*}', next_line) + and next_line.find('} else ') == -1): + error(filename, linenum, 'whitespace/blank_line', 3, + 'Redundant blank line at the end of a code block ' + 'should be deleted.') + + matched = Match(r'\s*(public|protected|private):', prev_line) + if matched: + error(filename, linenum, 'whitespace/blank_line', 3, + 'Do not leave a blank line after "%s:"' % matched.group(1)) + + # Next, check comments + next_line_start = 0 + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + next_line_start = len(next_line) - len(next_line.lstrip()) + CheckComment(line, filename, linenum, next_line_start, error) + + # get rid of comments and strings + line = clean_lines.elided[linenum] + + # You shouldn't have spaces before your brackets, except maybe after + # 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'. + if Search(r'\w\s+\[', line) and not Search(r'(?:auto&?|delete|return)\s+\[', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Extra space before [') + + # In range-based for, we wanted spaces before and after the colon, but + # not around "::" tokens that might appear. + if (Search(r'for *\(.*[^:]:[^: ]', line) or + Search(r'for *\(.*[^: ]:[^:]', line)): + error(filename, linenum, 'whitespace/forcolon', 2, + 'Missing space around colon in range-based for loop') + +def CheckOperatorSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around operators. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Don't try to do spacing checks for operator methods. Do this by + # replacing the troublesome characters with something else, + # preserving column position for all other characters. + # + # The replacement is done repeatedly to avoid false positives from + # operators that call operators. + while True: + match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) + if match: + line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) + else: + break + + # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". + # Otherwise not. Note we only check for non-spaces on *both* sides; + # sometimes people put non-spaces on one side when aligning ='s among + # many lines (not that this is behavior that I approve of...) + if ((Search(r'[\w.]=', line) or + Search(r'=[\w.]', line)) + and not Search(r'\b(if|while|for) ', line) + # Operators taken from [lex.operators] in C++11 standard. + and not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) + and not Search(r'operator=', line)): + error(filename, linenum, 'whitespace/operators', 4, + 'Missing spaces around =') + + # It's ok not to have spaces around binary operators like + - * /, but if + # there's too little whitespace, we get concerned. It's hard to tell, + # though, so we punt on this one for now. TODO. + + # You should always have whitespace around binary operators. + # + # Check <= and >= first to avoid false positives with < and >, then + # check non-include lines for spacing around < and >. + # + # If the operator is followed by a comma, assume it's be used in a + # macro context and don't do any checks. This avoids false + # positives. + # + # Note that && is not included here. This is because there are too + # many false positives due to RValue references. + match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around %s' % match.group(1)) + elif not Match(r'#.*include', line): + # Look for < that is not surrounded by spaces. This is only + # triggered if both sides are missing spaces, even though + # technically should should flag if at least one side is missing a + # space. This is done to avoid some false positives with shifts. + match = Match(r'^(.*[^\s<])<[^\s=<,]', line) + if match: + (_, _, end_pos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if end_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <') + + # Look for > that is not surrounded by spaces. Similar to the + # above, we only trigger if both sides are missing spaces to avoid + # false positives with shifts. + match = Match(r'^(.*[^-\s>])>[^\s=>,]', line) + if match: + (_, _, start_pos) = ReverseCloseExpression( + clean_lines, linenum, len(match.group(1))) + if start_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >') + + # We allow no-spaces around << when used like this: 10<<20, but + # not otherwise (particularly, not when used as streams) + # + # We also allow operators following an opening parenthesis, since + # those tend to be macros that deal with operators. + match = Search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line) + if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and + not (match.group(1) == 'operator' and match.group(2) == ';')): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <<') + + # We allow no-spaces around >> for almost anything. This is because + # C++11 allows ">>" to close nested templates, which accounts for + # most cases when ">>" is not followed by a space. + # + # We still warn on ">>" followed by alpha character, because that is + # likely due to ">>" being used for right shifts, e.g.: + # value >> alpha + # + # When ">>" is used to close templates, the alphanumeric letter that + # follows would be part of an identifier, and there should still be + # a space separating the template type and the identifier. + # type> alpha + match = Search(r'>>[a-zA-Z_]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >>') + + # There shouldn't be space around unary operators + match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) + if match: + error(filename, linenum, 'whitespace/operators', 4, + 'Extra space for operator %s' % match.group(1)) + +def CheckParenthesisSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around parentheses. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # No spaces after an if, while, switch, or for + match = Search(r' (if\(|for\(|while\(|switch\()', line) + if match: + error(filename, linenum, 'whitespace/parens', 5, + 'Missing space before ( in %s' % match.group(1)) + + # For if/for/while/switch, the left and right parens should be + # consistent about how many spaces are inside the parens, and + # there should either be zero or one spaces inside the parens. + # We don't want: "if ( foo)" or "if ( foo )". + # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. + match = Search(r'\b(if|for|while|switch)\s*' + r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', + line) + if match: + if len(match.group(2)) != len(match.group(4)): + if not (match.group(3) == ';' and + len(match.group(2)) == 1 + len(match.group(4)) or + not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)): + error(filename, linenum, 'whitespace/parens', 5, + 'Mismatching spaces inside () in %s' % match.group(1)) + if len(match.group(2)) not in [0, 1]: + error(filename, linenum, 'whitespace/parens', 5, + 'Should have zero or one spaces inside ( and ) in %s' % + match.group(1)) + +def CheckCommaSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing near commas and semicolons. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + raw = clean_lines.lines_without_raw_strings + line = clean_lines.elided[linenum] + + # You should always have a space after a comma (either as fn arg or operator) + # + # This does not apply when the non-space character following the + # comma is another comma, since the only time when that happens is + # for empty macro arguments. + # + # We run this check in two passes: first pass on elided lines to + # verify that lines contain missing whitespaces, second pass on raw + # lines to confirm that those missing whitespaces are not due to + # elided comments. + if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and + Search(r',[^,\s]', raw[linenum])): + error(filename, linenum, 'whitespace/comma', 3, + 'Missing space after ,') + + # You should always have a space after a semicolon + # except for few corner cases + # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more + # space after ; + if Search(r';[^\s};\\)/]', line): + error(filename, linenum, 'whitespace/semicolon', 3, + 'Missing space after ;') + +def _IsType(clean_lines, nesting_state, expr): + """Check if expression looks like a type name, returns true if so. + + Args: + clean_lines: A CleansedLines instance containing the file. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + expr: The expression to check. + Returns: + True, if token looks like a type. + """ + # Keep only the last token in the expression + last_word = Match(r'^.*(\b\S+)$', expr) + if last_word: + token = last_word.group(1) + else: + token = expr + + # Match native types and stdint types + if _TYPES.match(token): + return True + + # Try a bit harder to match templated types. Walk up the nesting + # stack until we find something that resembles a typename + # declaration for what we are looking for. + typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) + + r'\b') + block_index = len(nesting_state.stack) - 1 + while block_index >= 0: + if isinstance(nesting_state.stack[block_index], _NamespaceInfo): + return False + + # Found where the opening brace is. We want to scan from this + # line up to the beginning of the function, minus a few lines. + # template + # class C + # : public ... { // start scanning here + last_line = nesting_state.stack[block_index].starting_linenum + + next_block_start = 0 + if block_index > 0: + next_block_start = nesting_state.stack[block_index - 1].starting_linenum + first_line = last_line + while first_line >= next_block_start: + if clean_lines.elided[first_line].find('template') >= 0: + break + first_line -= 1 + if first_line < next_block_start: + # Didn't find any "template" keyword before reaching the next block, + # there are probably no template things to check for this block + block_index -= 1 + continue + + # Look for typename in the specified range + for i in xrange(first_line, last_line + 1, 1): + if Search(typename_pattern, clean_lines.elided[i]): + return True + block_index -= 1 + + return False + +def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for horizontal spacing near commas. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Except after an opening paren, or after another opening brace (in case of + # an initializer list, for instance), you should have spaces before your + # braces when they are delimiting blocks, classes, namespaces etc. + # And since you should never have braces at the beginning of a line, + # this is an easy test. Except that braces used for initialization don't + # follow the same rule; we often don't want spaces before those. + match = Match(r'^(.*[^ ({>]){', line) + + if match: + # Try a bit harder to check for brace initialization. This + # happens in one of the following forms: + # Constructor() : initializer_list_{} { ... } + # Constructor{}.MemberFunction() + # Type variable{}; + # FunctionCall(type{}, ...); + # LastArgument(..., type{}); + # LOG(INFO) << type{} << " ..."; + # map_of_type[{...}] = ...; + # ternary = expr ? new type{} : nullptr; + # OuterTemplate{}> + # + # We check for the character following the closing brace, and + # silence the warning if it's one of those listed above, i.e. + # "{.;,)<>]:". + # + # To account for nested initializer list, we allow any number of + # closing braces up to "{;,)<". We can't simply silence the + # warning on first sight of closing brace, because that would + # cause false negatives for things that are not initializer lists. + # Silence this: But not this: + # Outer{ if (...) { + # Inner{...} if (...){ // Missing space before { + # }; } + # + # There is a false negative with this approach if people inserted + # spurious semicolons, e.g. "if (cond){};", but we will catch the + # spurious semicolon with a separate check. + leading_text = match.group(1) + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + trailing_text = '' + if endpos > -1: + trailing_text = endline[endpos:] + for offset in xrange(endlinenum + 1, + min(endlinenum + 3, clean_lines.NumLines() - 1)): + trailing_text += clean_lines.elided[offset] + # We also suppress warnings for `uint64_t{expression}` etc., as the style + # guide recommends brace initialization for integral types to avoid + # overflow/truncation. + if (not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text) + and not _IsType(clean_lines, nesting_state, leading_text)): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before {') + + # Make sure '} else {' has spaces. + if Search(r'}else', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before else') + + # You shouldn't have a space before a semicolon at the end of the line. + # There's a special case for "for" since the style guide allows space before + # the semicolon there. + if Search(r':\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Semicolon defining empty statement. Use {} instead.') + elif Search(r'^\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Line contains only semicolon. If this should be an empty statement, ' + 'use {} instead.') + elif (Search(r'\s+;\s*$', line) and + not Search(r'\bfor\b', line)): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Extra space before last semicolon. If this should be an empty ' + 'statement, use {} instead.') + +def IsDecltype(clean_lines, linenum, column): + """Check if the token ending on (linenum, column) is decltype(). + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: the number of the line to check. + column: end column of the token to check. + Returns: + True if this token is decltype() expression, False otherwise. + """ + (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) + if start_col < 0: + return False + if Search(r'\bdecltype\s*$', text[0:start_col]): + return True + return False + +def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): + """Checks for additional blank line issues related to sections. + + Currently the only thing checked here is blank line before protected/private. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + class_info: A _ClassInfo objects. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Skip checks if the class is small, where small means 25 lines or less. + # 25 lines seems like a good cutoff since that's the usual height of + # terminals, and any class that can't fit in one screen can't really + # be considered "small". + # + # Also skip checks if we are on the first line. This accounts for + # classes that look like + # class Foo { public: ... }; + # + # If we didn't find the end of the class, last_line would be zero, + # and the check will be skipped by the first condition. + if (class_info.last_line - class_info.starting_linenum <= 24 or + linenum <= class_info.starting_linenum): + return + + matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) + if matched: + # Issue warning if the line before public/protected/private was + # not a blank line, but don't do this if the previous line contains + # "class" or "struct". This can happen two ways: + # - We are at the beginning of the class. + # - We are forward-declaring an inner class that is semantically + # private, but needed to be public for implementation reasons. + # Also ignores cases where the previous line ends with a backslash as can be + # common when defining classes in C macros. + prev_line = clean_lines.lines[linenum - 1] + if (not IsBlankLine(prev_line) and + not Search(r'\b(class|struct)\b', prev_line) and + not Search(r'\\$', prev_line)): + # Try a bit harder to find the beginning of the class. This is to + # account for multi-line base-specifier lists, e.g.: + # class Derived + # : public Base { + end_class_head = class_info.starting_linenum + for i in range(class_info.starting_linenum, linenum): + if Search(r'\{\s*$', clean_lines.lines[i]): + end_class_head = i + break + if end_class_head < linenum - 1: + error(filename, linenum, 'whitespace/blank_line', 3, + '"%s:" should be preceded by a blank line' % matched.group(1)) + +def GetPreviousNonBlankLine(clean_lines, linenum): + """Return the most recent non-blank line and its line number. + + Args: + clean_lines: A CleansedLines instance containing the file contents. + linenum: The number of the line to check. + + Returns: + A tuple with two elements. The first element is the contents of the last + non-blank line before the current line, or the empty string if this is the + first non-blank line. The second is the line number of that line, or -1 + if this is the first non-blank line. + """ + + prevlinenum = linenum - 1 + while prevlinenum >= 0: + prevline = clean_lines.elided[prevlinenum] + if not IsBlankLine(prevline): # if not a blank line... + return (prevline, prevlinenum) + prevlinenum -= 1 + return ('', -1) + +def CheckBraces(filename, clean_lines, linenum, error): + """Looks for misplaced braces (e.g. at the end of line). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] # get rid of comments and strings + + if Match(r'\s*{\s*$', line): + # We allow an open brace to start a line in the case where someone is using + # braces in a block to explicitly create a new scope, which is commonly used + # to control the lifetime of stack-allocated variables. Braces are also + # used for brace initializers inside function calls. We don't detect this + # perfectly: we just don't complain if the last non-whitespace character on + # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the + # previous line starts a preprocessor block. We also allow a brace on the + # following line if it is part of an array initialization and would not fit + # within the 80 character limit of the preceding line. + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if (not Search(r'[,;:}{(]\s*$', prevline) and + not Match(r'\s*#', prevline) and + not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)): + error(filename, linenum, 'whitespace/braces', 4, + '{ should almost always be at the end of the previous line') + + # An else clause should be on the same line as the preceding closing brace. + if Match(r'\s*else\b\s*(?:if\b|\{|$)', line): + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if Match(r'\s*}\s*$', prevline): + error(filename, linenum, 'whitespace/newline', 4, + 'An else should appear on the same line as the preceding }') + + # If braces come on one side of an else, they should be on both. + # However, we have to worry about "else if" that spans multiple lines! + if Search(r'else if\s*\(', line): # could be multi-line if + brace_on_left = bool(Search(r'}\s*else if\s*\(', line)) + # find the ( after the if + pos = line.find('else if') + pos = line.find('(', pos) + if pos > 0: + (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) + brace_on_right = endline[endpos:].find('{') != -1 + if brace_on_left != brace_on_right: # must be brace after if + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + + # Likewise, an else should never have the else clause on the same line + if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): + error(filename, linenum, 'whitespace/newline', 4, + 'Else clause should never be on same line as else (use 2 lines)') + + # In the same way, a do/while should never be on one line + if Match(r'\s*do [^\s{]', line): + error(filename, linenum, 'whitespace/newline', 4, + 'do/while clauses should not be on a single line') + + # Check single-line if/else bodies. The style guide says 'curly braces are not + # required for single-line statements'. We additionally allow multi-line, + # single statements, but we reject anything with more than one semicolon in + # it. This means that the first semicolon after the if should be at the end of + # its line, and the line after that should have an indent level equal to or + # lower than the if. We also check for ambiguous if/else nesting without + # braces. + if_else_match = Search(r'\b(if\s*(|constexpr)\s*\(|else\b)', line) + if if_else_match and not Match(r'\s*#', line): + if_indent = GetIndentLevel(line) + endline, endlinenum, endpos = line, linenum, if_else_match.end() + if_match = Search(r'\bif\s*(|constexpr)\s*\(', line) + if if_match: + # This could be a multiline if condition, so find the end first. + pos = if_match.end() - 1 + (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) + # Check for an opening brace, either directly after the if or on the next + # line. If found, this isn't a single-statement conditional. + if (not Match(r'\s*{', endline[endpos:]) + and not (Match(r'\s*$', endline[endpos:]) + and endlinenum < (len(clean_lines.elided) - 1) + and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))): + while (endlinenum < len(clean_lines.elided) + and ';' not in clean_lines.elided[endlinenum][endpos:]): + endlinenum += 1 + endpos = 0 + if endlinenum < len(clean_lines.elided): + endline = clean_lines.elided[endlinenum] + # We allow a mix of whitespace and closing braces (e.g. for one-liner + # methods) and a single \ after the semicolon (for macros) + endpos = endline.find(';') + if not Match(r';[\s}]*(\\?)$', endline[endpos:]): + # Semicolon isn't the last character, there's something trailing. + # Output a warning if the semicolon is not contained inside + # a lambda expression. + if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', + endline): + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + elif endlinenum < len(clean_lines.elided) - 1: + # Make sure the next line is dedented + next_line = clean_lines.elided[endlinenum + 1] + next_indent = GetIndentLevel(next_line) + # With ambiguous nested if statements, this will error out on the + # if that *doesn't* match the else, regardless of whether it's the + # inner one or outer one. + if (if_match and Match(r'\s*else\b', next_line) + and next_indent != if_indent): + error(filename, linenum, 'readability/braces', 4, + 'Else clause should be indented at the same level as if. ' + 'Ambiguous nested if/else chains require braces.') + elif next_indent > if_indent: + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + +def CheckTrailingSemicolon(filename, clean_lines, linenum, error): + """Looks for redundant trailing semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] + + # Block bodies should not be followed by a semicolon. Due to C++11 + # brace initialization, there are more places where semicolons are + # required than not, so we explicitly list the allowed rules rather + # than listing the disallowed ones. These are the places where "};" + # should be replaced by just "}": + # 1. Some flavor of block following closing parenthesis: + # for (;;) {}; + # while (...) {}; + # switch (...) {}; + # Function(...) {}; + # if (...) {}; + # if (...) else if (...) {}; + # + # 2. else block: + # if (...) else {}; + # + # 3. const member function: + # Function(...) const {}; + # + # 4. Block following some statement: + # x = 42; + # {}; + # + # 5. Block at the beginning of a function: + # Function(...) { + # {}; + # } + # + # Note that naively checking for the preceding "{" will also match + # braces inside multi-dimensional arrays, but this is fine since + # that expression will not contain semicolons. + # + # 6. Block following another block: + # while (true) {} + # {}; + # + # 7. End of namespaces: + # namespace {}; + # + # These semicolons seems far more common than other kinds of + # redundant semicolons, possibly due to people converting classes + # to namespaces. For now we do not warn for this case. + # + # Try matching case 1 first. + match = Match(r'^(.*\)\s*)\{', line) + if match: + # Matched closing parenthesis (case 1). Check the token before the + # matching opening parenthesis, and don't warn if it looks like a + # macro. This avoids these false positives: + # - macro that defines a base class + # - multi-line macro that defines a base class + # - macro that defines the whole class-head + # + # But we still issue warnings for macros that we know are safe to + # warn, specifically: + # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P + # - TYPED_TEST + # - INTERFACE_DEF + # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: + # + # We implement a list of safe macros instead of a list of + # unsafe macros, even though the latter appears less frequently in + # google code and would have been easier to implement. This is because + # the downside for getting the allowed checks wrong means some extra + # semicolons, while the downside for getting disallowed checks wrong + # would result in compile errors. + # + # In addition to macros, we also don't want to warn on + # - Compound literals + # - Lambdas + # - alignas specifier with anonymous structs + # - decltype + closing_brace_pos = match.group(1).rfind(')') + opening_parenthesis = ReverseCloseExpression( + clean_lines, linenum, closing_brace_pos) + if opening_parenthesis[2] > -1: + line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] + macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) + func = Match(r'^(.*\])\s*$', line_prefix) + if ((macro and + macro.group(1) not in ( + 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', + 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', + 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or + (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or + Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or + Search(r'\bdecltype$', line_prefix) or + Search(r'\s+=\s*$', line_prefix)): + match = None + if (match and + opening_parenthesis[1] > 1 and + Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): + # Multi-line lambda-expression + match = None + + else: + # Try matching cases 2-3. + match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) + if not match: + # Try matching cases 4-6. These are always matched on separate lines. + # + # Note that we can't simply concatenate the previous line to the + # current line and do a single match, otherwise we may output + # duplicate warnings for the blank line case: + # if (cond) { + # // blank line + # } + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if prevline and Search(r'[;{}]\s*$', prevline): + match = Match(r'^(\s*)\{', line) + + # Check matching closing brace + if match: + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if endpos > -1 and Match(r'^\s*;', endline[endpos:]): + # Current {} pair is eligible for semicolon check, and we have found + # the redundant semicolon, output warning here. + # + # Note: because we are scanning forward for opening braces, and + # outputting warnings for the matching closing brace, if there are + # nested blocks with trailing semicolons, we will get the error + # messages in reversed order. + + # We need to check the line forward for NOLINT + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1, + error) + ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, + error) + + error(filename, endlinenum, 'readability/braces', 4, + "You don't need a ; after a }") + +def CheckEmptyBlockBody(filename, clean_lines, linenum, error): + """Look for empty loop/conditional body with only a single semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Search for loop keywords at the beginning of the line. Because only + # whitespaces are allowed before the keywords, this will also ignore most + # do-while-loops, since those lines should start with closing brace. + # + # We also check "if" blocks here, since an empty conditional block + # is likely an error. + line = clean_lines.elided[linenum] + matched = Match(r'\s*(for|while|if)\s*\(', line) + if matched: + # Find the end of the conditional expression. + (end_line, end_linenum, end_pos) = CloseExpression( + clean_lines, linenum, line.find('(')) + + # Output warning if what follows the condition expression is a semicolon. + # No warning for all other cases, including whitespace or newline, since we + # have a separate check for semicolons preceded by whitespace. + if end_pos >= 0 and Match(r';', end_line[end_pos:]): + if matched.group(1) == 'if': + error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, + 'Empty conditional bodies should use {}') + else: + error(filename, end_linenum, 'whitespace/empty_loop_body', 5, + 'Empty loop bodies should use {} or continue') + + # Check for if statements that have completely empty bodies (no comments) + # and no else clauses. + if end_pos >= 0 and matched.group(1) == 'if': + # Find the position of the opening { for the if statement. + # Return without logging an error if it has no brackets. + opening_linenum = end_linenum + opening_line_fragment = end_line[end_pos:] + # Loop until EOF or find anything that's not whitespace or opening {. + while not Search(r'^\s*\{', opening_line_fragment): + if Search(r'^(?!\s*$)', opening_line_fragment): + # Conditional has no brackets. + return + opening_linenum += 1 + if opening_linenum == len(clean_lines.elided): + # Couldn't find conditional's opening { or any code before EOF. + return + opening_line_fragment = clean_lines.elided[opening_linenum] + # Set opening_line (opening_line_fragment may not be entire opening line). + opening_line = clean_lines.elided[opening_linenum] + + # Find the position of the closing }. + opening_pos = opening_line_fragment.find('{') + if opening_linenum == end_linenum: + # We need to make opening_pos relative to the start of the entire line. + opening_pos += end_pos + (closing_line, closing_linenum, closing_pos) = CloseExpression( + clean_lines, opening_linenum, opening_pos) + if closing_pos < 0: + return + + # Now construct the body of the conditional. This consists of the portion + # of the opening line after the {, all lines until the closing line, + # and the portion of the closing line before the }. + if (clean_lines.raw_lines[opening_linenum] != + CleanseComments(clean_lines.raw_lines[opening_linenum])): + # Opening line ends with a comment, so conditional isn't empty. + return + if closing_linenum > opening_linenum: + # Opening line after the {. Ignore comments here since we checked above. + bodylist = list(opening_line[opening_pos+1:]) + # All lines until closing line, excluding closing line, with comments. + bodylist.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum]) + # Closing line before the }. Won't (and can't) have comments. + bodylist.append(clean_lines.elided[closing_linenum][:closing_pos-1]) + body = '\n'.join(bodylist) + else: + # If statement has brackets and fits on a single line. + body = opening_line[opening_pos+1:closing_pos-1] + + # Check if the body is empty + if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): + return + # The body is empty. Now make sure there's not an else clause. + current_linenum = closing_linenum + current_line_fragment = closing_line[closing_pos:] + # Loop until EOF or find anything that's not whitespace or else clause. + while Search(r'^\s*$|^(?=\s*else)', current_line_fragment): + if Search(r'^(?=\s*else)', current_line_fragment): + # Found an else clause, so don't log an error. + return + current_linenum += 1 + if current_linenum == len(clean_lines.elided): + break + current_line_fragment = clean_lines.elided[current_linenum] + + # The body is empty and there's no else clause until EOF or other code. + error(filename, end_linenum, 'whitespace/empty_if_body', 4, + ('If statement had no body and no else clause')) + +def FindCheckMacro(line): + """Find a replaceable CHECK-like macro. + + Args: + line: line to search on. + Returns: + (macro name, start position), or (None, -1) if no replaceable + macro is found. + """ + for macro in _CHECK_MACROS: + i = line.find(macro) + if i >= 0: + # Find opening parenthesis. Do a regular expression match here + # to make sure that we are matching the expected CHECK macro, as + # opposed to some other macro that happens to contain the CHECK + # substring. + matched = Match(r'^(.*\b' + macro + r'\s*)\(', line) + if not matched: + continue + return (macro, len(matched.group(1))) + return (None, -1) + +def CheckCheck(filename, clean_lines, linenum, error): + """Checks the use of CHECK and EXPECT macros. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Decide the set of replacement macros that should be suggested + lines = clean_lines.elided + (check_macro, start_pos) = FindCheckMacro(lines[linenum]) + if not check_macro: + return + + # Find end of the boolean expression by matching parentheses + (last_line, end_line, end_pos) = CloseExpression( + clean_lines, linenum, start_pos) + if end_pos < 0: + return + + # If the check macro is followed by something other than a + # semicolon, assume users will log their own custom error messages + # and don't suggest any replacements. + if not Match(r'\s*;', last_line[end_pos:]): + return + + if linenum == end_line: + expression = lines[linenum][start_pos + 1:end_pos - 1] + else: + expression = lines[linenum][start_pos + 1:] + for i in xrange(linenum + 1, end_line): + expression += lines[i] + expression += last_line[0:end_pos - 1] + + # Parse expression so that we can take parentheses into account. + # This avoids false positives for inputs like "CHECK((a < 4) == b)", + # which is not replaceable by CHECK_LE. + lhs = '' + rhs = '' + operator = None + while expression: + matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' + r'==|!=|>=|>|<=|<|\()(.*)$', expression) + if matched: + token = matched.group(1) + if token == '(': + # Parenthesized operand + expression = matched.group(2) + (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) + if end < 0: + return # Unmatched parenthesis + lhs += '(' + expression[0:end] + expression = expression[end:] + elif token in ('&&', '||'): + # Logical and/or operators. This means the expression + # contains more than one term, for example: + # CHECK(42 < a && a < b); + # + # These are not replaceable with CHECK_LE, so bail out early. + return + elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): + # Non-relational operator + lhs += token + expression = matched.group(2) + else: + # Relational operator + operator = token + rhs = matched.group(2) + break + else: + # Unparenthesized operand. Instead of appending to lhs one character + # at a time, we do another regular expression match to consume several + # characters at once if possible. Trivial benchmark shows that this + # is more efficient when the operands are longer than a single + # character, which is generally the case. + matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression) + if not matched: + matched = Match(r'^(\s*\S)(.*)$', expression) + if not matched: + break + lhs += matched.group(1) + expression = matched.group(2) + + # Only apply checks if we got all parts of the boolean expression + if not (lhs and operator and rhs): + return + + # Check that rhs do not contain logical operators. We already know + # that lhs is fine since the loop above parses out && and ||. + if rhs.find('&&') > -1 or rhs.find('||') > -1: + return + + # At least one of the operands must be a constant literal. This is + # to avoid suggesting replacements for unprintable things like + # CHECK(variable != iterator) + # + # The following pattern matches decimal, hex integers, strings, and + # characters (in that order). + lhs = lhs.strip() + rhs = rhs.strip() + match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' + if Match(match_constant, lhs) or Match(match_constant, rhs): + # Note: since we know both lhs and rhs, we can provide a more + # descriptive error message like: + # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) + # Instead of: + # Consider using CHECK_EQ instead of CHECK(a == b) + # + # We are still keeping the less descriptive message because if lhs + # or rhs gets long, the error message might become unreadable. + error(filename, linenum, 'readability/check', 2, + 'Consider using %s instead of %s(a %s b)' % ( + _CHECK_REPLACEMENT[check_macro][operator], + check_macro, operator)) + +def CheckAltTokens(filename, clean_lines, linenum, error): + """Check alternative keywords being used in boolean expressions. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Avoid preprocessor lines + if Match(r'^\s*#', line): + return + + # Last ditch effort to avoid multi-line comments. This will not help + # if the comment started before the current line or ended after the + # current line, but it catches most of the false positives. At least, + # it provides a way to workaround this warning for people who use + # multi-line comments in preprocessor macros. + # + # TODO(unknown): remove this once cpplint has better support for + # multi-line comments. + if line.find('/*') >= 0 or line.find('*/') >= 0: + return + + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + error(filename, linenum, 'readability/alt_tokens', 2, + 'Use operator %s instead of %s' % ( + _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) + +def GetLineWidth(line): + """Determines the width of the line in column positions. + + Args: + line: A string, which may be a Unicode string. + + Returns: + The width of the line in column positions, accounting for Unicode + combining characters and wide characters. + """ + if isinstance(line, unicode): + width = 0 + for uc in unicodedata.normalize('NFC', line): + if unicodedata.east_asian_width(uc) in ('W', 'F'): + width += 2 + elif not unicodedata.combining(uc): + # Issue 337 + # https://mail.python.org/pipermail/python-list/2012-August/628809.html + if (sys.version_info.major, sys.version_info.minor) <= (3, 2): + # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81 + is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4 + # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564 + is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF + if not is_wide_build and is_low_surrogate: + width -= 1 + + width += 1 + return width + else: + return len(line) + +def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, + error): + """Checks rules from the 'C++ style rules' section of cppguide.html. + + Most of these rules are hard to test (naming, comment style), but we + do what we can. In particular we check for 2-space indents, line lengths, + tab usage, spaces inside code, etc. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw_lines = clean_lines.lines_without_raw_strings + line = raw_lines[linenum] + prev = raw_lines[linenum - 1] if linenum > 0 else '' + + if line.find('\t') != -1: + error(filename, linenum, 'whitespace/tab', 1, + 'Tab found; better to use spaces') + + # One or three blank spaces at the beginning of the line is weird; it's + # hard to reconcile that with 2-space indents. + # NOTE: here are the conditions rob pike used for his tests. Mine aren't + # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces + # if(RLENGTH > 20) complain = 0; + # if(match($0, " +(error|private|public|protected):")) complain = 0; + # if(match(prev, "&& *$")) complain = 0; + # if(match(prev, "\\|\\| *$")) complain = 0; + # if(match(prev, "[\",=><] *$")) complain = 0; + # if(match($0, " <<")) complain = 0; + # if(match(prev, " +for \\(")) complain = 0; + # if(prevodd && match(prevprev, " +for \\(")) complain = 0; + scope_or_label_pattern = r'\s*(?:public|private|protected|signals)(?:\s+(?:slots\s*)?)?:\s*\\?$' + classinfo = nesting_state.InnermostClass() + initial_spaces = 0 + cleansed_line = clean_lines.elided[linenum] + while initial_spaces < len(line) and line[initial_spaces] == ' ': + initial_spaces += 1 + # There are certain situations we allow one space, notably for + # section labels, and also lines containing multi-line raw strings. + # We also don't check for lines that look like continuation lines + # (of lines ending in double quotes, commas, equals, or angle brackets) + # because the rules for how to indent those are non-trivial. + if (not Search(r'[",=><] *$', prev) and + (initial_spaces == 1 or initial_spaces == 3) and + not Match(scope_or_label_pattern, cleansed_line) and + not (clean_lines.raw_lines[linenum] != line and + Match(r'^\s*""', line))): + error(filename, linenum, 'whitespace/indent', 3, + 'Weird number of spaces at line-start. ' + 'Are you using a 2-space indent?') + + if line and line[-1].isspace(): + error(filename, linenum, 'whitespace/end_of_line', 4, + 'Line ends in whitespace. Consider deleting these extra spaces.') + + # Check if the line is a header guard. + is_header_guard = False + if IsHeaderExtension(file_extension): + cppvar = GetHeaderGuardCPPVariable(filename) + if (line.startswith('#ifndef %s' % cppvar) or + line.startswith('#define %s' % cppvar) or + line.startswith('#endif // %s' % cppvar)): + is_header_guard = True + # #include lines and header guards can be long, since there's no clean way to + # split them. + # + # URLs can be long too. It's possible to split these, but it makes them + # harder to cut&paste. + # + # The "$Id:...$" comment may also get very long without it being the + # developers fault. + # + # Doxygen documentation copying can get pretty long when using an overloaded + # function declaration + if (not line.startswith('#include') and not is_header_guard and + not Match(r'^\s*//.*http(s?)://\S*$', line) and + not Match(r'^\s*//\s*[^\s]*$', line) and + not Match(r'^// \$Id:.*#[0-9]+ \$$', line) and + not Match(r'^\s*/// [@\\](copydoc|copydetails|copybrief) .*$', line)): + line_width = GetLineWidth(line) + if line_width > _line_length: + error(filename, linenum, 'whitespace/line_length', 2, + 'Lines should be <= %i characters long' % _line_length) + + if (cleansed_line.count(';') > 1 and + # allow simple single line lambdas + not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}', + line) and + # for loops are allowed two ;'s (and may run over two lines). + cleansed_line.find('for') == -1 and + (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or + GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and + # It's ok to have many commands in a switch case that fits in 1 line + not ((cleansed_line.find('case ') != -1 or + cleansed_line.find('default:') != -1) and + cleansed_line.find('break;') != -1)): + error(filename, linenum, 'whitespace/newline', 0, + 'More than one command on the same line') + + # Some more style checks + CheckBraces(filename, clean_lines, linenum, error) + CheckTrailingSemicolon(filename, clean_lines, linenum, error) + CheckEmptyBlockBody(filename, clean_lines, linenum, error) + CheckSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckOperatorSpacing(filename, clean_lines, linenum, error) + CheckParenthesisSpacing(filename, clean_lines, linenum, error) + CheckCommaSpacing(filename, clean_lines, linenum, error) + CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) + CheckCheck(filename, clean_lines, linenum, error) + CheckAltTokens(filename, clean_lines, linenum, error) + classinfo = nesting_state.InnermostClass() + if classinfo: + CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) + +_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') +# Matches the first component of a filename delimited by -s and _s. That is: +# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' +_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') + +def _DropCommonSuffixes(filename): + """Drops common suffixes like _test.cc or -inl.h from filename. + + For example: + >>> _DropCommonSuffixes('foo/foo-inl.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/bar/foo.cc') + 'foo/bar/foo' + >>> _DropCommonSuffixes('foo/foo_internal.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') + 'foo/foo_unusualinternal' + + Args: + filename: The input filename. + + Returns: + The filename with the common suffix removed. + """ + for suffix in itertools.chain( + ('%s.%s' % (test_suffix.lstrip('_'), ext) + for test_suffix, ext in itertools.product(_test_suffixes, GetNonHeaderExtensions())), + ('%s.%s' % (suffix, ext) + for suffix, ext in itertools.product(['inl', 'imp', 'internal'], GetHeaderExtensions()))): + if (filename.endswith(suffix) and len(filename) > len(suffix) and + filename[-len(suffix) - 1] in ('-', '_')): + return filename[:-len(suffix) - 1] + return os.path.splitext(filename)[0] + +def _ClassifyInclude(fileinfo, include, used_angle_brackets, include_order="default"): + """Figures out what kind of header 'include' is. + + Args: + fileinfo: The current file cpplint is running over. A FileInfo instance. + include: The path to a #included file. + used_angle_brackets: True if the #include used <> rather than "". + include_order: "default" or other value allowed in program arguments + + Returns: + One of the _XXX_HEADER constants. + + For example: + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) + _C_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) + _CPP_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', True, "standardcfirst") + _OTHER_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) + _LIKELY_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), + ... 'bar/foo_other_ext.h', False) + _POSSIBLE_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) + _OTHER_HEADER + """ + # This is a list of all standard c++ header files, except + # those already checked for above. + is_cpp_header = include in _CPP_HEADERS + + # Mark include as C header if in list or in a known folder for standard-ish C headers. + is_std_c_header = (include_order == "default") or (include in _C_HEADERS + # additional linux glibc header folders + or Search(r'(?:%s)\/.*\.h' % "|".join(C_STANDARD_HEADER_FOLDERS), include)) + + # Headers with C++ extensions shouldn't be considered C system headers + is_system = used_angle_brackets and not os.path.splitext(include)[1] in ['.hpp', '.hxx', '.h++'] + + if is_system: + if is_cpp_header: + return _CPP_SYS_HEADER + if is_std_c_header: + return _C_SYS_HEADER + else: + return _OTHER_SYS_HEADER + + # If the target file and the include we're checking share a + # basename when we drop common extensions, and the include + # lives in . , then it's likely to be owned by the target file. + target_dir, target_base = ( + os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) + include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) + target_dir_pub = os.path.normpath(target_dir + '/../public') + target_dir_pub = target_dir_pub.replace('\\', '/') + if target_base == include_base and ( + include_dir == target_dir or + include_dir == target_dir_pub): + return _LIKELY_MY_HEADER + + # If the target and include share some initial basename + # component, it's possible the target is implementing the + # include, so it's allowed to be first, but we'll never + # complain if it's not there. + target_first_component = _RE_FIRST_COMPONENT.match(target_base) + include_first_component = _RE_FIRST_COMPONENT.match(include_base) + if (target_first_component and include_first_component and + target_first_component.group(0) == + include_first_component.group(0)): + return _POSSIBLE_MY_HEADER + + return _OTHER_HEADER + +def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): + """Check rules that are applicable to #include lines. + + Strings on #include lines are NOT removed from elided line, to make + certain tasks easier. However, to prevent false positives, checks + applicable to #include lines in CheckLanguage must be put here. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + include_state: An _IncludeState instance in which the headers are inserted. + error: The function to call with any errors found. + """ + fileinfo = FileInfo(filename) + line = clean_lines.lines[linenum] + + # "include" should use the new style "foo/bar.h" instead of just "bar.h" + # Only do this check if the included header follows google naming + # conventions. If not, assume that it's a 3rd party API that + # requires special include conventions. + # + # We also make an exception for Lua headers, which follow google + # naming convention but not the include convention. + match = Match(r'#include\s*"([^/]+\.h)"', line) + if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)): + error(filename, linenum, 'build/include_subdir', 4, + 'Include the directory when naming .h files') + + # we shouldn't include a file more than once. actually, there are a + # handful of instances where doing so is okay, but in general it's + # not. + match = _RE_PATTERN_INCLUDE.search(line) + if match: + include = match.group(2) + used_angle_brackets = (match.group(1) == '<') + duplicate_line = include_state.FindHeader(include) + if duplicate_line >= 0: + error(filename, linenum, 'build/include', 4, + '"%s" already included at %s:%s' % + (include, filename, duplicate_line)) + return + + for extension in GetNonHeaderExtensions(): + if (include.endswith('.' + extension) and + os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)): + error(filename, linenum, 'build/include', 4, + 'Do not include .' + extension + ' files from other packages') + return + + # We DO want to include a 3rd party looking header if it matches the + # filename. Otherwise we get an erroneous error "...should include its + # header" error later. + third_src_header = False + for ext in GetHeaderExtensions(): + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + headerfile = basefilename + '.' + ext + headername = FileInfo(headerfile).RepositoryName() + if headername in include or include in headername: + third_src_header = True + break + + if third_src_header or not _THIRD_PARTY_HEADERS_PATTERN.match(include): + include_state.include_list[-1].append((include, linenum)) + + # We want to ensure that headers appear in the right order: + # 1) for foo.cc, foo.h (preferred location) + # 2) c system files + # 3) cpp system files + # 4) for foo.cc, foo.h (deprecated location) + # 5) other google headers + # + # We classify each include statement as one of those 5 types + # using a number of techniques. The include_state object keeps + # track of the highest type seen, and complains if we see a + # lower type after that. + error_message = include_state.CheckNextIncludeOrder( + _ClassifyInclude(fileinfo, include, used_angle_brackets, _include_order)) + if error_message: + error(filename, linenum, 'build/include_order', 4, + '%s. Should be: %s.h, c system, c++ system, other.' % + (error_message, fileinfo.BaseName())) + canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) + if not include_state.IsInAlphabeticalOrder( + clean_lines, linenum, canonical_include): + error(filename, linenum, 'build/include_alpha', 4, + 'Include "%s" not in alphabetical order' % include) + include_state.SetLastHeader(canonical_include) + +def _GetTextInside(text, start_pattern): + r"""Retrieves all the text between matching open and close parentheses. + + Given a string of lines and a regular expression string, retrieve all the text + following the expression and between opening punctuation symbols like + (, [, or {, and the matching close-punctuation symbol. This properly nested + occurrences of the punctuations, so for the text like + printf(a(), b(c())); + a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. + start_pattern must match string having an open punctuation symbol at the end. + + Args: + text: The lines to extract text. Its comments and strings must be elided. + It can be single line and can span multiple lines. + start_pattern: The regexp string indicating where to start extracting + the text. + Returns: + The extracted text. + None if either the opening string or ending punctuation could not be found. + """ + # TODO(unknown): Audit cpplint.py to see what places could be profitably + # rewritten to use _GetTextInside (and use inferior regexp matching today). + + # Give opening punctuations to get the matching close-punctuations. + matching_punctuation = {'(': ')', '{': '}', '[': ']'} + closing_punctuation = set(itervalues(matching_punctuation)) + + # Find the position to start extracting text. + match = re.search(start_pattern, text, re.M) + if not match: # start_pattern not found in text. + return None + start_position = match.end(0) + + assert start_position > 0, ( + 'start_pattern must ends with an opening punctuation.') + assert text[start_position - 1] in matching_punctuation, ( + 'start_pattern must ends with an opening punctuation.') + # Stack of closing punctuations we expect to have in text after position. + punctuation_stack = [matching_punctuation[text[start_position - 1]]] + position = start_position + while punctuation_stack and position < len(text): + if text[position] == punctuation_stack[-1]: + punctuation_stack.pop() + elif text[position] in closing_punctuation: + # A closing punctuation without matching opening punctuations. + return None + elif text[position] in matching_punctuation: + punctuation_stack.append(matching_punctuation[text[position]]) + position += 1 + if punctuation_stack: + # Opening punctuations left without matching close-punctuations. + return None + # punctuations match. + return text[start_position:position - 1] + +# Patterns for matching call-by-reference parameters. +# +# Supports nested templates up to 2 levels deep using this messy pattern: +# < (?: < (?: < [^<>]* +# > +# | [^<>] )* +# > +# | [^<>] )* +# > +_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* +_RE_PATTERN_TYPE = ( + r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' + r'(?:\w|' + r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' + r'::)+') +# A call-by-reference parameter ends with '& identifier'. +_RE_PATTERN_REF_PARAM = re.compile( + r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' + r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') +# A call-by-const-reference parameter either ends with 'const& identifier' +# or looks like 'const type& identifier' when 'type' is atomic. +_RE_PATTERN_CONST_REF_PARAM = ( + r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + + r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') +# Stream types. +_RE_PATTERN_REF_STREAM_PARAM = ( + r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')') + +def CheckLanguage(filename, clean_lines, linenum, file_extension, + include_state, nesting_state, error): + """Checks rules from the 'C++ language rules' section of cppguide.html. + + Some of these rules are hard to test (function overloading, using + uint32 inappropriately), but we do the best we can. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + include_state: An _IncludeState instance in which the headers are inserted. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # If the line is empty or consists of entirely a comment, no need to + # check it. + line = clean_lines.elided[linenum] + if not line: + return + + match = _RE_PATTERN_INCLUDE.search(line) + if match: + CheckIncludeLine(filename, clean_lines, linenum, include_state, error) + return + + # Reset include state across preprocessor directives. This is meant + # to silence warnings for conditional includes. + match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) + if match: + include_state.ResetSection(match.group(1)) + + # Perform other checks now that we are sure that this is not an include line + CheckCasts(filename, clean_lines, linenum, error) + CheckGlobalStatic(filename, clean_lines, linenum, error) + CheckPrintf(filename, clean_lines, linenum, error) + + if IsHeaderExtension(file_extension): + # TODO(unknown): check that 1-arg constructors are explicit. + # How to tell it's a constructor? + # (handled in CheckForNonStandardConstructs for now) + # TODO(unknown): check that classes declare or disable copy/assign + # (level 1 error) + pass + + # Check if people are using the verboten C basic types. The only exception + # we regularly allow is "unsigned short port" for port. + if Search(r'\bshort port\b', line): + if not Search(r'\bunsigned short port\b', line): + error(filename, linenum, 'runtime/int', 4, + 'Use "unsigned short" for ports, not "short"') + else: + match = Search(r'\b(short|long(?! +double)|long long)\b', line) + if match: + error(filename, linenum, 'runtime/int', 4, + 'Use int16/int64/etc, rather than the C type %s' % match.group(1)) + + # Check if some verboten operator overloading is going on + # TODO(unknown): catch out-of-line unary operator&: + # class X {}; + # int operator&(const X& x) { return 42; } // unary operator& + # The trick is it's hard to tell apart from binary operator&: + # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& + if Search(r'\boperator\s*&\s*\(\s*\)', line): + error(filename, linenum, 'runtime/operator', 4, + 'Unary operator& is dangerous. Do not use it.') + + # Check for suspicious usage of "if" like + # } if (a == b) { + if Search(r'\}\s*if\s*\(', line): + error(filename, linenum, 'readability/braces', 4, + 'Did you mean "else if"? If not, start a new line for "if".') + + # Check for potential format string bugs like printf(foo). + # We constrain the pattern not to pick things like DocidForPrintf(foo). + # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) + # TODO(unknown): Catch the following case. Need to change the calling + # convention of the whole function to process multiple line to handle it. + # printf( + # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); + printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') + if printf_args: + match = Match(r'([\w.\->()]+)$', printf_args) + if match and match.group(1) != '__VA_ARGS__': + function_name = re.search(r'\b((?:string)?printf)\s*\(', + line, re.I).group(1) + error(filename, linenum, 'runtime/printf', 4, + 'Potential format string bug. Do %s("%%s", %s) instead.' + % (function_name, match.group(1))) + + # Check for potential memset bugs like memset(buf, sizeof(buf), 0). + match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) + if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): + error(filename, linenum, 'runtime/memset', 4, + 'Did you mean "memset(%s, 0, %s)"?' + % (match.group(1), match.group(2))) + + if Search(r'\busing namespace\b', line): + if Search(r'\bliterals\b', line): + error(filename, linenum, 'build/namespaces_literals', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + else: + error(filename, linenum, 'build/namespaces', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + + # Detect variable-length arrays. + match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) + if (match and match.group(2) != 'return' and match.group(2) != 'delete' and + match.group(3).find(']') == -1): + # Split the size using space and arithmetic operators as delimiters. + # If any of the resulting tokens are not compile time constants then + # report the error. + tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) + is_const = True + skip_next = False + for tok in tokens: + if skip_next: + skip_next = False + continue + + if Search(r'sizeof\(.+\)', tok): continue + if Search(r'arraysize\(\w+\)', tok): continue + + tok = tok.lstrip('(') + tok = tok.rstrip(')') + if not tok: continue + if Match(r'\d+', tok): continue + if Match(r'0[xX][0-9a-fA-F]+', tok): continue + if Match(r'k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue + # A catch all for tricky sizeof cases, including 'sizeof expression', + # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' + # requires skipping the next token because we split on ' ' and '*'. + if tok.startswith('sizeof'): + skip_next = True + continue + is_const = False + break + if not is_const: + error(filename, linenum, 'runtime/arrays', 1, + 'Do not use variable-length arrays. Use an appropriately named ' + "('k' followed by CamelCase) compile-time constant for the size.") + + # Check for use of unnamed namespaces in header files. Registration + # macros are typically OK, so we allow use of "namespace {" on lines + # that end with backslashes. + if (IsHeaderExtension(file_extension) + and Search(r'\bnamespace\s*{', line) + and line[-1] != '\\'): + error(filename, linenum, 'build/namespaces_headers', 4, + 'Do not use unnamed namespaces in header files. See ' + 'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' + ' for more information.') + +def CheckGlobalStatic(filename, clean_lines, linenum, error): + """Check for unsafe global or static objects. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Match two lines at a time to support multiline declarations + if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line): + line += clean_lines.elided[linenum + 1].strip() + + # Check for people declaring static/global STL strings at the top level. + # This is dangerous because the C++ language does not guarantee that + # globals with constructors are initialized before the first access, and + # also because globals can be destroyed when some threads are still running. + # TODO(unknown): Generalize this to also find static unique_ptr instances. + # TODO(unknown): File bugs for clang-tidy to find these. + match = Match( + r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +' + r'([a-zA-Z0-9_:]+)\b(.*)', + line) + + # Remove false positives: + # - String pointers (as opposed to values). + # string *pointer + # const string *pointer + # string const *pointer + # string *const pointer + # + # - Functions and template specializations. + # string Function(... + # string Class::Method(... + # + # - Operators. These are matched separately because operator names + # cross non-word boundaries, and trying to match both operators + # and functions at the same time would decrease accuracy of + # matching identifiers. + # string Class::operator*() + if (match and + not Search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and + not Search(r'\boperator\W', line) and + not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))): + if Search(r'\bconst\b', line): + error(filename, linenum, 'runtime/string', 4, + 'For a static/global string constant, use a C style string ' + 'instead: "%schar%s %s[]".' % + (match.group(1), match.group(2) or '', match.group(3))) + else: + error(filename, linenum, 'runtime/string', 4, + 'Static/global string variables are not permitted.') + + if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or + Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): + error(filename, linenum, 'runtime/init', 4, + 'You seem to be initializing a member variable with itself.') + +def CheckPrintf(filename, clean_lines, linenum, error): + """Check for printf related issues. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # When snprintf is used, the second argument shouldn't be a literal. + match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) + if match and match.group(2) != '0': + # If 2nd arg is zero, snprintf is used to calculate size. + error(filename, linenum, 'runtime/printf', 3, + 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' + 'to snprintf.' % (match.group(1), match.group(2))) + + # Check if some verboten C functions are being used. + if Search(r'\bsprintf\s*\(', line): + error(filename, linenum, 'runtime/printf', 5, + 'Never use sprintf. Use snprintf instead.') + match = Search(r'\b(strcpy|strcat)\s*\(', line) + if match: + error(filename, linenum, 'runtime/printf', 4, + 'Almost always, snprintf is better than %s' % match.group(1)) + +def IsDerivedFunction(clean_lines, linenum): + """Check if current line contains an inherited function. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains a function with "override" + virt-specifier. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i]) + if match: + # Look for "override" after the matching closing parenthesis + line, _, closing_paren = CloseExpression( + clean_lines, i, len(match.group(1))) + return (closing_paren >= 0 and + Search(r'\boverride\b', line[closing_paren:])) + return False + +def IsOutOfLineMethodDefinition(clean_lines, linenum): + """Check if current line contains an out-of-line method definition. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains an out-of-line method definition. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]): + return Match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None + return False + +def IsInitializerList(clean_lines, linenum): + """Check if current line is inside constructor initializer list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line appears to be inside constructor initializer + list, False otherwise. + """ + for i in xrange(linenum, 1, -1): + line = clean_lines.elided[i] + if i == linenum: + remove_function_body = Match(r'^(.*)\{\s*$', line) + if remove_function_body: + line = remove_function_body.group(1) + + if Search(r'\s:\s*\w+[({]', line): + # A lone colon tend to indicate the start of a constructor + # initializer list. It could also be a ternary operator, which + # also tend to appear in constructor initializer lists as + # opposed to parameter lists. + return True + if Search(r'\}\s*,\s*$', line): + # A closing brace followed by a comma is probably the end of a + # brace-initialized member in constructor initializer list. + return True + if Search(r'[{};]\s*$', line): + # Found one of the following: + # - A closing brace or semicolon, probably the end of the previous + # function. + # - An opening brace, probably the start of current class or namespace. + # + # Current line is probably not inside an initializer list since + # we saw one of those things without seeing the starting colon. + return False + + # Got to the beginning of the file without seeing the start of + # constructor initializer list. + return False + +def CheckForNonConstReference(filename, clean_lines, linenum, + nesting_state, error): + """Check for non-const references. + + Separate from CheckLanguage since it scans backwards from current + line, instead of scanning forward. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # Do nothing if there is no '&' on current line. + line = clean_lines.elided[linenum] + if '&' not in line: + return + + # If a function is inherited, current function doesn't have much of + # a choice, so any non-const references should not be blamed on + # derived function. + if IsDerivedFunction(clean_lines, linenum): + return + + # Don't warn on out-of-line method definitions, as we would warn on the + # in-line declaration, if it isn't marked with 'override'. + if IsOutOfLineMethodDefinition(clean_lines, linenum): + return + + # Long type names may be broken across multiple lines, usually in one + # of these forms: + # LongType + # ::LongTypeContinued &identifier + # LongType:: + # LongTypeContinued &identifier + # LongType< + # ...>::LongTypeContinued &identifier + # + # If we detected a type split across two lines, join the previous + # line to current line so that we can match const references + # accordingly. + # + # Note that this only scans back one line, since scanning back + # arbitrary number of lines would be expensive. If you have a type + # that spans more than 2 lines, please use a typedef. + if linenum > 1: + previous = None + if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): + # previous_line\n + ::current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', + clean_lines.elided[linenum - 1]) + elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): + # previous_line::\n + current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', + clean_lines.elided[linenum - 1]) + if previous: + line = previous.group(1) + line.lstrip() + else: + # Check for templated parameter that is split across multiple lines + endpos = line.rfind('>') + if endpos > -1: + (_, startline, startpos) = ReverseCloseExpression( + clean_lines, linenum, endpos) + if startpos > -1 and startline < linenum: + # Found the matching < on an earlier line, collect all + # pieces up to current line. + line = '' + for i in xrange(startline, linenum + 1): + line += clean_lines.elided[i].strip() + + # Check for non-const references in function parameters. A single '&' may + # found in the following places: + # inside expression: binary & for bitwise AND + # inside expression: unary & for taking the address of something + # inside declarators: reference parameter + # We will exclude the first two cases by checking that we are not inside a + # function body, including one that was just introduced by a trailing '{'. + # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. + if (nesting_state.previous_stack_top and + not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or + isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): + # Not at toplevel, not within a class, and not within a namespace + return + + # Avoid initializer lists. We only need to scan back from the + # current line for something that starts with ':'. + # + # We don't need to check the current line, since the '&' would + # appear inside the second set of parentheses on the current line as + # opposed to the first set. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 10), -1): + previous_line = clean_lines.elided[i] + if not Search(r'[),]\s*$', previous_line): + break + if Match(r'^\s*:\s+\S', previous_line): + return + + # Avoid preprocessors + if Search(r'\\\s*$', line): + return + + # Avoid constructor initializer lists + if IsInitializerList(clean_lines, linenum): + return + + # We allow non-const references in a few standard places, like functions + # called "swap()" or iostream operators like "<<" or ">>". Do not check + # those function parameters. + # + # We also accept & in static_assert, which looks like a function but + # it's actually a declaration expression. + allowed_functions = (r'(?:[sS]wap(?:<\w:+>)?|' + r'operator\s*[<>][<>]|' + r'static_assert|COMPILE_ASSERT' + r')\s*\(') + if Search(allowed_functions, line): + return + elif not Search(r'\S+\([^)]*$', line): + # Don't see an allowed function on this line. Actually we + # didn't see any function name on this line, so this is likely a + # multi-line parameter list. Try a bit harder to catch this case. + for i in xrange(2): + if (linenum > i and + Search(allowed_functions, clean_lines.elided[linenum - i - 1])): + return + + decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body + for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): + if (not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and + not Match(_RE_PATTERN_REF_STREAM_PARAM, parameter)): + error(filename, linenum, 'runtime/references', 2, + 'Is this a non-const reference? ' + 'If so, make const or use a pointer: ' + + ReplaceAll(' *<', '<', parameter)) + +def CheckCasts(filename, clean_lines, linenum, error): + """Various cast related checks. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Check to see if they're using an conversion function cast. + # I just try to capture the most common basic types, though there are more. + # Parameterless conversion functions, such as bool(), are allowed as they are + # probably a member operator declaration or default constructor. + match = Search( + r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b' + r'(int|float|double|bool|char|int32|uint32|int64|uint64)' + r'(\([^)].*)', line) + expecting_function = ExpectingFunctionArgs(clean_lines, linenum) + if match and not expecting_function: + matched_type = match.group(2) + + # matched_new_or_template is used to silence two false positives: + # - New operators + # - Template arguments with function types + # + # For template arguments, we match on types immediately following + # an opening bracket without any spaces. This is a fast way to + # silence the common case where the function type is the first + # template argument. False negative with less-than comparison is + # avoided because those operators are usually followed by a space. + # + # function // bracket + no space = false positive + # value < double(42) // bracket + space = true positive + matched_new_or_template = match.group(1) + + # Avoid arrays by looking for brackets that come after the closing + # parenthesis. + if Match(r'\([^()]+\)\s*\[', match.group(3)): + return + + # Other things to ignore: + # - Function pointers + # - Casts to pointer types + # - Placement new + # - Alias declarations + matched_funcptr = match.group(3) + if (matched_new_or_template is None and + not (matched_funcptr and + (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', + matched_funcptr) or + matched_funcptr.startswith('(*)'))) and + not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and + not Search(r'new\(\S+\)\s*' + matched_type, line)): + error(filename, linenum, 'readability/casting', 4, + 'Using deprecated casting style. ' + 'Use static_cast<%s>(...) instead' % + matched_type) + + if not expecting_function: + CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', + r'\((int|float|double|bool|char|u?int(16|32|64))\)', error) + + # This doesn't catch all cases. Consider (const char * const)"hello". + # + # (char *) "foo" should always be a const_cast (reinterpret_cast won't + # compile). + if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast', + r'\((char\s?\*+\s?)\)\s*"', error): + pass + else: + # Check pointer casts for other than string constants + CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', + r'\((\w+\s?\*+\s?)\)', error) + + # In addition, we look for people taking the address of a cast. This + # is dangerous -- casts can assign to temporaries, so the pointer doesn't + # point where you think. + # + # Some non-identifier character is required before the '&' for the + # expression to be recognized as a cast. These are casts: + # expression = &static_cast(temporary()); + # function(&(int*)(temporary())); + # + # This is not a cast: + # reference_type&(int* function_param); + match = Search( + r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' + r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) + if match: + # Try a better error message when the & is bound to something + # dereferenced by the casted pointer, as opposed to the casted + # pointer itself. + parenthesis_error = False + match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) + if match: + _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) + if x1 >= 0 and clean_lines.elided[y1][x1] == '(': + _, y2, x2 = CloseExpression(clean_lines, y1, x1) + if x2 >= 0: + extended_line = clean_lines.elided[y2][x2:] + if y2 < clean_lines.NumLines() - 1: + extended_line += clean_lines.elided[y2 + 1] + if Match(r'\s*(?:->|\[)', extended_line): + parenthesis_error = True + + if parenthesis_error: + error(filename, linenum, 'readability/casting', 4, + ('Are you taking an address of something dereferenced ' + 'from a cast? Wrapping the dereferenced expression in ' + 'parentheses will make the binding more obvious')) + else: + error(filename, linenum, 'runtime/casting', 4, + ('Are you taking an address of a cast? ' + 'This is dangerous: could be a temp var. ' + 'Take the address before doing the cast, rather than after')) + +def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): + """Checks for a C-style cast by looking for the pattern. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + cast_type: The string for the C++ cast to recommend. This is either + reinterpret_cast, static_cast, or const_cast, depending. + pattern: The regular expression used to find C-style casts. + error: The function to call with any errors found. + + Returns: + True if an error was emitted. + False otherwise. + """ + line = clean_lines.elided[linenum] + match = Search(pattern, line) + if not match: + return False + + # Exclude lines with keywords that tend to look like casts + context = line[0:match.start(1) - 1] + if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): + return False + + # Try expanding current context to see if we one level of + # parentheses inside a macro. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 5), -1): + context = clean_lines.elided[i] + context + if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): + return False + + # operator++(int) and operator--(int) + if context.endswith(' operator++') or context.endswith(' operator--'): + return False + + # A single unnamed argument for a function tends to look like old style cast. + # If we see those, don't issue warnings for deprecated casts. + remainder = line[match.end(0):] + if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', + remainder): + return False + + # At this point, all that should be left is actual casts. + error(filename, linenum, 'readability/casting', 4, + 'Using C-style cast. Use %s<%s>(...) instead' % + (cast_type, match.group(1))) + + return True + +def ExpectingFunctionArgs(clean_lines, linenum): + """Checks whether where function type arguments are expected. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + + Returns: + True if the line at 'linenum' is inside something that expects arguments + of function types. + """ + line = clean_lines.elided[linenum] + return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or + (linenum >= 2 and + (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', + clean_lines.elided[linenum - 1]) or + Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', + clean_lines.elided[linenum - 2]) or + Search(r'\bstd::m?function\s*\<\s*$', + clean_lines.elided[linenum - 1])))) + +_HEADERS_CONTAINING_TEMPLATES = ( + ('', ('deque',)), + ('', ('unary_function', 'binary_function', + 'plus', 'minus', 'multiplies', 'divides', 'modulus', + 'negate', + 'equal_to', 'not_equal_to', 'greater', 'less', + 'greater_equal', 'less_equal', + 'logical_and', 'logical_or', 'logical_not', + 'unary_negate', 'not1', 'binary_negate', 'not2', + 'bind1st', 'bind2nd', + 'pointer_to_unary_function', + 'pointer_to_binary_function', + 'ptr_fun', + 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', + 'mem_fun_ref_t', + 'const_mem_fun_t', 'const_mem_fun1_t', + 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', + 'mem_fun_ref', + )), + ('', ('numeric_limits',)), + ('', ('list',)), + ('', ('multimap',)), + ('', ('allocator', 'make_shared', 'make_unique', 'shared_ptr', + 'unique_ptr', 'weak_ptr')), + ('', ('queue', 'priority_queue',)), + ('', ('multiset',)), + ('', ('stack',)), + ('', ('char_traits', 'basic_string',)), + ('', ('tuple',)), + ('', ('unordered_map', 'unordered_multimap')), + ('', ('unordered_set', 'unordered_multiset')), + ('', ('pair',)), + ('', ('vector',)), + + # gcc extensions. + # Note: std::hash is their hash, ::hash is our hash + ('', ('hash_map', 'hash_multimap',)), + ('', ('hash_set', 'hash_multiset',)), + ('', ('slist',)), + ) + +_HEADERS_MAYBE_TEMPLATES = ( + ('', ('copy', 'max', 'min', 'min_element', 'sort', + 'transform', + )), + ('', ('forward', 'make_pair', 'move', 'swap')), + ) + +_RE_PATTERN_STRING = re.compile(r'\bstring\b') + +_re_pattern_headers_maybe_templates = [] +for _header, _templates in _HEADERS_MAYBE_TEMPLATES: + for _template in _templates: + # Match max(..., ...), max(..., ...), but not foo->max, foo.max or + # 'type::max()'. + _re_pattern_headers_maybe_templates.append( + (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), + _template, + _header)) +# Match set, but not foo->set, foo.set +_re_pattern_headers_maybe_templates.append( + (re.compile(r'[^>.]\bset\s*\<'), + 'set<>', + '')) +# Match 'map var' and 'std::map(...)', but not 'map(...)'' +_re_pattern_headers_maybe_templates.append( + (re.compile(r'(std\b::\bmap\s*\<)|(^(std\b::\b)map\b\(\s*\<)'), + 'map<>', + '')) + +# Other scripts may reach in and modify this pattern. +_re_pattern_templates = [] +for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: + for _template in _templates: + _re_pattern_templates.append( + (re.compile(r'(\<|\b)' + _template + r'\s*\<'), + _template + '<>', + _header)) + +def FilesBelongToSameModule(filename_cc, filename_h): + """Check if these two filenames belong to the same module. + + The concept of a 'module' here is a as follows: + foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the + same 'module' if they are in the same directory. + some/path/public/xyzzy and some/path/internal/xyzzy are also considered + to belong to the same module here. + + If the filename_cc contains a longer path than the filename_h, for example, + '/absolute/path/to/base/sysinfo.cc', and this file would include + 'base/sysinfo.h', this function also produces the prefix needed to open the + header. This is used by the caller of this function to more robustly open the + header file. We don't have access to the real include paths in this context, + so we need this guesswork here. + + Known bugs: tools/base/bar.cc and base/bar.h belong to the same module + according to this implementation. Because of this, this function gives + some false positives. This should be sufficiently rare in practice. + + Args: + filename_cc: is the path for the source (e.g. .cc) file + filename_h: is the path for the header path + + Returns: + Tuple with a bool and a string: + bool: True if filename_cc and filename_h belong to the same module. + string: the additional prefix needed to open the header file. + """ + fileinfo_cc = FileInfo(filename_cc) + if not fileinfo_cc.Extension().lstrip('.') in GetNonHeaderExtensions(): + return (False, '') + + fileinfo_h = FileInfo(filename_h) + if not IsHeaderExtension(fileinfo_h.Extension().lstrip('.')): + return (False, '') + + filename_cc = filename_cc[:-(len(fileinfo_cc.Extension()))] + matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName()) + if matched_test_suffix: + filename_cc = filename_cc[:-len(matched_test_suffix.group(1))] + + filename_cc = filename_cc.replace('/public/', '/') + filename_cc = filename_cc.replace('/internal/', '/') + + filename_h = filename_h[:-(len(fileinfo_h.Extension()))] + if filename_h.endswith('-inl'): + filename_h = filename_h[:-len('-inl')] + filename_h = filename_h.replace('/public/', '/') + filename_h = filename_h.replace('/internal/', '/') + + files_belong_to_same_module = filename_cc.endswith(filename_h) + common_path = '' + if files_belong_to_same_module: + common_path = filename_cc[:-len(filename_h)] + return files_belong_to_same_module, common_path + +def UpdateIncludeState(filename, include_dict, io=codecs): + """Fill up the include_dict with new includes found from the file. + + Args: + filename: the name of the header to read. + include_dict: a dictionary in which the headers are inserted. + io: The io factory to use to read the file. Provided for testability. + + Returns: + True if a header was successfully added. False otherwise. + """ + headerfile = None + try: + with io.open(filename, 'r', 'utf8', 'replace') as headerfile: + linenum = 0 + for line in headerfile: + linenum += 1 + clean_line = CleanseComments(line) + match = _RE_PATTERN_INCLUDE.search(clean_line) + if match: + include = match.group(2) + include_dict.setdefault(include, linenum) + return True + except IOError: + return False + +def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, + io=codecs): + """Reports for missing stl includes. + + This function will output warnings to make sure you are including the headers + necessary for the stl containers and functions that you use. We only give one + reason to include a header. For example, if you use both equal_to<> and + less<> in a .h file, only one (the latter in the file) of these will be + reported as a reason to include the . + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + include_state: An _IncludeState instance. + error: The function to call with any errors found. + io: The IO factory to use to read the header file. Provided for unittest + injection. + """ + required = {} # A map of header name to linenumber and the template entity. + # Example of required: { '': (1219, 'less<>') } + + for linenum in xrange(clean_lines.NumLines()): + line = clean_lines.elided[linenum] + if not line or line[0] == '#': + continue + + # String is special -- it is a non-templatized type in STL. + matched = _RE_PATTERN_STRING.search(line) + if matched: + # Don't warn about strings in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[''] = (linenum, 'string') + + for pattern, template, header in _re_pattern_headers_maybe_templates: + if pattern.search(line): + required[header] = (linenum, template) + + # The following function is just a speed up, no semantics are changed. + if not '<' in line: # Reduces the cpu time usage by skipping lines. + continue + + for pattern, template, header in _re_pattern_templates: + matched = pattern.search(line) + if matched: + # Don't warn about IWYU in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[header] = (linenum, template) + + # The policy is that if you #include something in foo.h you don't need to + # include it again in foo.cc. Here, we will look at possible includes. + # Let's flatten the include_state include_list and copy it into a dictionary. + include_dict = dict([item for sublist in include_state.include_list + for item in sublist]) + + # Did we find the header for this file (if any) and successfully load it? + header_found = False + + # Use the absolute path so that matching works properly. + abs_filename = FileInfo(filename).FullName() + + # For Emacs's flymake. + # If cpplint is invoked from Emacs's flymake, a temporary file is generated + # by flymake and that file name might end with '_flymake.cc'. In that case, + # restore original file name here so that the corresponding header file can be + # found. + # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' + # instead of 'foo_flymake.h' + abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename) + + # include_dict is modified during iteration, so we iterate over a copy of + # the keys. + header_keys = list(include_dict.keys()) + for header in header_keys: + (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) + fullpath = common_path + header + if same_module and UpdateIncludeState(fullpath, include_dict, io): + header_found = True + + # If we can't find the header file for a .cc, assume it's because we don't + # know where to look. In that case we'll give up as we're not sure they + # didn't include it in the .h file. + # TODO(unknown): Do a better job of finding .h files so we are confident that + # not having the .h file means there isn't one. + if not header_found: + for extension in GetNonHeaderExtensions(): + if filename.endswith('.' + extension): + return + + # All the lines have been processed, report the errors found. + for required_header_unstripped in sorted(required, key=required.__getitem__): + template = required[required_header_unstripped][1] + if required_header_unstripped.strip('<>"') not in include_dict: + error(filename, required[required_header_unstripped][0], + 'build/include_what_you_use', 4, + 'Add #include ' + required_header_unstripped + ' for ' + template) + +_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') + +def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): + """Check that make_pair's template arguments are deduced. + + G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are + specified explicitly, and such use isn't intended in any case. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) + if match: + error(filename, linenum, 'build/explicit_make_pair', + 4, # 4 = high confidence + 'For C++11-compatibility, omit template arguments from make_pair' + ' OR use pair directly OR if appropriate, construct a pair directly') + +def CheckRedundantVirtual(filename, clean_lines, linenum, error): + """Check if line contains a redundant "virtual" function-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for "virtual" on current line. + line = clean_lines.elided[linenum] + virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line) + if not virtual: return + + # Ignore "virtual" keywords that are near access-specifiers. These + # are only used in class base-specifier and do not apply to member + # functions. + if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or + Match(r'^\s+(public|protected|private)\b', virtual.group(3))): + return + + # Ignore the "virtual" keyword from virtual base classes. Usually + # there is a column on the same line in these cases (virtual base + # classes are rare in google3 because multiple inheritance is rare). + if Match(r'^.*[^:]:[^:].*$', line): return + + # Look for the next opening parenthesis. This is the start of the + # parameter list (possibly on the next line shortly after virtual). + # TODO(unknown): doesn't work if there are virtual functions with + # decltype() or other things that use parentheses, but csearch suggests + # that this is rare. + end_col = -1 + end_line = -1 + start_col = len(virtual.group(2)) + for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())): + line = clean_lines.elided[start_line][start_col:] + parameter_list = Match(r'^([^(]*)\(', line) + if parameter_list: + # Match parentheses to find the end of the parameter list + (_, end_line, end_col) = CloseExpression( + clean_lines, start_line, start_col + len(parameter_list.group(1))) + break + start_col = 0 + + if end_col < 0: + return # Couldn't find end of parameter list, give up + + # Look for "override" or "final" after the parameter list + # (possibly on the next few lines). + for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())): + line = clean_lines.elided[i][end_col:] + match = Search(r'\b(override|final)\b', line) + if match: + error(filename, linenum, 'readability/inheritance', 4, + ('"virtual" is redundant since function is ' + 'already declared as "%s"' % match.group(1))) + + # Set end_col to check whole lines after we are done with the + # first line. + end_col = 0 + if Search(r'[^\w]\s*$', line): + break + +def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): + """Check if line contains a redundant "override" or "final" virt-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for closing parenthesis nearby. We need one to confirm where + # the declarator ends and where the virt-specifier starts to avoid + # false positives. + line = clean_lines.elided[linenum] + declarator_end = line.rfind(')') + if declarator_end >= 0: + fragment = line[declarator_end:] + else: + if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0: + fragment = line + else: + return + + # Check that at most one of "override" or "final" is present, not both + if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment): + error(filename, linenum, 'readability/inheritance', 4, + ('"override" is redundant since function is ' + 'already declared as "final"')) + +# Returns true if we are at a new block, and it is directly +# inside of a namespace. +def IsBlockInNameSpace(nesting_state, is_forward_declaration): + """Checks that the new block is directly in a namespace. + + Args: + nesting_state: The _NestingState object that contains info about our state. + is_forward_declaration: If the class is a forward declared class. + Returns: + Whether or not the new block is directly in a namespace. + """ + if is_forward_declaration: + return len(nesting_state.stack) >= 1 and ( + isinstance(nesting_state.stack[-1], _NamespaceInfo)) + + return (len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.stack[-2], _NamespaceInfo)) + +def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + raw_lines_no_comments, linenum): + """This method determines if we should apply our namespace indentation check. + + Args: + nesting_state: The current nesting state. + is_namespace_indent_item: If we just put a new class on the stack, True. + If the top of the stack is not a class, or we did not recently + add the class, False. + raw_lines_no_comments: The lines without the comments. + linenum: The current line number we are processing. + + Returns: + True if we should apply our namespace indentation check. Currently, it + only works for classes and namespaces inside of a namespace. + """ + + is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, + linenum) + + if not (is_namespace_indent_item or is_forward_declaration): + return False + + # If we are in a macro, we do not want to check the namespace indentation. + if IsMacroDefinition(raw_lines_no_comments, linenum): + return False + + return IsBlockInNameSpace(nesting_state, is_forward_declaration) + +# Call this method if the line is directly inside of a namespace. +# If the line above is blank (excluding comments) or the start of +# an inner namespace, it cannot be indented. +def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, + error): + line = raw_lines_no_comments[linenum] + if Match(r'^\s+', line): + error(filename, linenum, 'runtime/indentation_namespace', 4, + 'Do not indent within a namespace') + +def ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions=None): + """Processes a single line in the file. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + clean_lines: An array of strings, each representing a line of the file, + with comments stripped. + line: Number of line being processed. + include_state: An _IncludeState instance in which the headers are inserted. + function_state: A _FunctionState instance which counts function lines, etc. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[line], line, error) + nesting_state.Update(filename, clean_lines, line, error) + CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error) + if nesting_state.InAsmBlock(): return + CheckForFunctionLengths(filename, clean_lines, line, function_state, error) + CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) + CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) + CheckLanguage(filename, clean_lines, line, file_extension, include_state, + nesting_state, error) + CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) + CheckForNonStandardConstructs(filename, clean_lines, line, + nesting_state, error) + CheckVlogArguments(filename, clean_lines, line, error) + CheckPosixThreading(filename, clean_lines, line, error) + CheckInvalidIncrement(filename, clean_lines, line, error) + CheckMakePairUsesDeduction(filename, clean_lines, line, error) + CheckRedundantVirtual(filename, clean_lines, line, error) + CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) + if extra_check_functions: + for check_fn in extra_check_functions: + check_fn(filename, clean_lines, line, error) + +def FlagCxx11Features(filename, clean_lines, linenum, error): + """Flag those c++11 features that we only allow in certain places. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++ TR1 headers. + if include and include.group(1).startswith('tr1/'): + error(filename, linenum, 'build/c++tr1', 5, + ('C++ TR1 headers such as <%s> are unapproved.') % include.group(1)) + + # Flag unapproved C++11 headers. + if include and include.group(1) in ('cfenv', + 'condition_variable', + 'fenv.h', + 'future', + 'mutex', + 'thread', + 'chrono', + 'ratio', + 'regex', + 'system_error', + ): + error(filename, linenum, 'build/c++11', 5, + ('<%s> is an unapproved C++11 header.') % include.group(1)) + + # The only place where we need to worry about C++11 keywords and library + # features in preprocessor directives is in macro definitions. + if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return + + # These are classes and free functions. The classes are always + # mentioned as std::*, but we only catch the free functions if + # they're not found by ADL. They're alphabetical by header. + for top_name in ( + # type_traits + 'alignment_of', + 'aligned_union', + ): + if Search(r'\bstd::%s\b' % top_name, line): + error(filename, linenum, 'build/c++11', 5, + ('std::%s is an unapproved C++11 class or function. Send c-style ' + 'an example of where it would make your code more readable, and ' + 'they may let you use it.') % top_name) + +def FlagCxx14Features(filename, clean_lines, linenum, error): + """Flag those C++14 features that we restrict. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++14 headers. + if include and include.group(1) in ('scoped_allocator', 'shared_mutex'): + error(filename, linenum, 'build/c++14', 5, + ('<%s> is an unapproved C++14 header.') % include.group(1)) + +def ProcessFileData(filename, file_extension, lines, error, + extra_check_functions=None): + """Performs lint checks and reports any errors to the given error function. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + lines = (['// marker so line numbers and indices both start at 1'] + lines + + ['// marker so line numbers end in a known way']) + + include_state = _IncludeState() + function_state = _FunctionState() + nesting_state = NestingState() + + ResetNolintSuppressions() + + CheckForCopyright(filename, lines, error) + ProcessGlobalSuppresions(lines) + RemoveMultiLineComments(filename, lines, error) + clean_lines = CleansedLines(lines) + + if IsHeaderExtension(file_extension): + CheckForHeaderGuard(filename, clean_lines, error) + + for line in xrange(clean_lines.NumLines()): + ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions) + FlagCxx11Features(filename, clean_lines, line, error) + nesting_state.CheckCompletedBlocks(filename, error) + + CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) + + # Check that the .cc file has included its header if it exists. + if _IsSourceExtension(file_extension): + CheckHeaderFileIncluded(filename, include_state, error) + + # We check here rather than inside ProcessLine so that we see raw + # lines rather than "cleaned" lines. + CheckForBadCharacters(filename, lines, error) + + CheckForNewlineAtEOF(filename, lines, error) + +def ProcessConfigOverrides(filename): + """ Loads the configuration files and processes the config overrides. + + Args: + filename: The name of the file being processed by the linter. + + Returns: + False if the current |filename| should not be processed further. + """ + + abs_filename = os.path.abspath(filename) + cfg_filters = [] + keep_looking = True + while keep_looking: + abs_path, base_name = os.path.split(abs_filename) + if not base_name: + break # Reached the root directory. + + cfg_file = os.path.join(abs_path, "CPPLINT.cfg") + abs_filename = abs_path + if not os.path.isfile(cfg_file): + continue + + try: + with open(cfg_file) as file_handle: + for line in file_handle: + line, _, _ = line.partition('#') # Remove comments. + if not line.strip(): + continue + + name, _, val = line.partition('=') + name = name.strip() + val = val.strip() + if name == 'set noparent': + keep_looking = False + elif name == 'filter': + cfg_filters.append(val) + elif name == 'exclude_files': + # When matching exclude_files pattern, use the base_name of + # the current file name or the directory name we are processing. + # For example, if we are checking for lint errors in /foo/bar/baz.cc + # and we found the .cfg file at /foo/CPPLINT.cfg, then the config + # file's "exclude_files" filter is meant to be checked against "bar" + # and not "baz" nor "bar/baz.cc". + if base_name: + pattern = re.compile(val) + if pattern.match(base_name): + if _cpplint_state.quiet: + # Suppress "Ignoring file" warning when using --quiet. + return False + _cpplint_state.PrintInfo('Ignoring "%s": file excluded by "%s". ' + 'File path component "%s" matches ' + 'pattern "%s"\n' % + (filename, cfg_file, base_name, val)) + return False + elif name == 'linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + _cpplint_state.PrintError('Line length must be numeric.') + elif name == 'extensions': + ProcessExtensionsOption(val) + elif name == 'root': + global _root + # root directories are specified relative to CPPLINT.cfg dir. + _root = os.path.join(os.path.dirname(cfg_file), val) + elif name == 'headers': + ProcessHppHeadersOption(val) + elif name == 'includeorder': + ProcessIncludeOrderOption(val) + else: + _cpplint_state.PrintError( + 'Invalid configuration option (%s) in file %s\n' % + (name, cfg_file)) + + except IOError: + _cpplint_state.PrintError( + "Skipping config file '%s': Can't open for reading\n" % cfg_file) + keep_looking = False + + # Apply all the accumulated filters in reverse order (top-level directory + # config options having the least priority). + for cfg_filter in reversed(cfg_filters): + _AddFilters(cfg_filter) + + return True + +def ProcessFile(filename, vlevel, extra_check_functions=None): + """Does google-lint on a single file. + + Args: + filename: The name of the file to parse. + + vlevel: The level of errors to report. Every error of confidence + >= verbose_level will be reported. 0 is a good default. + + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + + _SetVerboseLevel(vlevel) + _BackupFilters() + old_errors = _cpplint_state.error_count + + if not ProcessConfigOverrides(filename): + _RestoreFilters() + return + + lf_lines = [] + crlf_lines = [] + try: + # Support the UNIX convention of using "-" for stdin. Note that + # we are not opening the file with universal newline support + # (which codecs doesn't support anyway), so the resulting lines do + # contain trailing '\r' characters if we are reading a file that + # has CRLF endings. + # If after the split a trailing '\r' is present, it is removed + # below. + if filename == '-': + lines = codecs.StreamReaderWriter(sys.stdin, + codecs.getreader('utf8'), + codecs.getwriter('utf8'), + 'replace').read().split('\n') + else: + with codecs.open(filename, 'r', 'utf8', 'replace') as target_file: + lines = target_file.read().split('\n') + + # Remove trailing '\r'. + # The -1 accounts for the extra trailing blank line we get from split() + for linenum in range(len(lines) - 1): + if lines[linenum].endswith('\r'): + lines[linenum] = lines[linenum].rstrip('\r') + crlf_lines.append(linenum + 1) + else: + lf_lines.append(linenum + 1) + + except IOError: + _cpplint_state.PrintError( + "Skipping input '%s': Can't open for reading\n" % filename) + _RestoreFilters() + return + + # Note, if no dot is found, this will give the entire filename as the ext. + file_extension = filename[filename.rfind('.') + 1:] + + # When reading from stdin, the extension is unknown, so no cpplint tests + # should rely on the extension. + if filename != '-' and file_extension not in GetAllExtensions(): + _cpplint_state.PrintError('Ignoring %s; not a valid file name ' + '(%s)\n' % (filename, ', '.join(GetAllExtensions()))) + else: + ProcessFileData(filename, file_extension, lines, Error, + extra_check_functions) + + # If end-of-line sequences are a mix of LF and CR-LF, issue + # warnings on the lines with CR. + # + # Don't issue any warnings if all lines are uniformly LF or CR-LF, + # since critique can handle these just fine, and the style guide + # doesn't dictate a particular end of line sequence. + # + # We can't depend on os.linesep to determine what the desired + # end-of-line sequence should be, since that will return the + # server-side end-of-line sequence. + if lf_lines and crlf_lines: + # Warn on every line with CR. An alternative approach might be to + # check whether the file is mostly CRLF or just LF, and warn on the + # minority, we bias toward LF here since most tools prefer LF. + for linenum in crlf_lines: + Error(filename, linenum, 'whitespace/newline', 1, + 'Unexpected \\r (^M) found; better to use only \\n') + + # Suppress printing anything if --quiet was passed unless the error + # count has increased after processing this file. + if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count: + _cpplint_state.PrintInfo('Done processing %s\n' % filename) + _RestoreFilters() + +def PrintUsage(message): + """Prints a brief usage string and exits, optionally with an error message. + + Args: + message: The optional error message. + """ + sys.stderr.write(_USAGE % (list(GetAllExtensions()), + ','.join(list(GetAllExtensions())), + GetHeaderExtensions(), + ','.join(GetHeaderExtensions()))) + + if message: + sys.exit('\nFATAL ERROR: ' + message) + else: + sys.exit(0) + +def PrintVersion(): + sys.stdout.write('Cpplint fork (https://github.com/cpplint/cpplint)\n') + sys.stdout.write('cpplint ' + __VERSION__ + '\n') + sys.stdout.write('Python ' + sys.version + '\n') + sys.exit(0) + +def PrintCategories(): + """Prints a list of all the error-categories used by error messages. + + These are the categories used to filter messages via --filter. + """ + sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES)) + sys.exit(0) + +def ParseArguments(args): + """Parses the command line arguments. + + This may set the output format and verbosity level as side-effects. + + Args: + args: The command line arguments: + + Returns: + The list of filenames to lint. + """ + try: + (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', + 'v=', + 'version', + 'counting=', + 'filter=', + 'root=', + 'repository=', + 'linelength=', + 'extensions=', + 'exclude=', + 'recursive', + 'headers=', + 'includeorder=', + 'quiet']) + except getopt.GetoptError: + PrintUsage('Invalid arguments.') + + verbosity = _VerboseLevel() + output_format = _OutputFormat() + filters = '' + quiet = _Quiet() + counting_style = '' + recursive = False + + for (opt, val) in opts: + if opt == '--help': + PrintUsage(None) + if opt == '--version': + PrintVersion() + elif opt == '--output': + if val not in ('emacs', 'vs7', 'eclipse', 'junit'): + PrintUsage('The only allowed output formats are emacs, vs7, eclipse ' + 'and junit.') + output_format = val + elif opt == '--quiet': + quiet = True + elif opt == '--verbose' or opt == '--v': + verbosity = int(val) + elif opt == '--filter': + filters = val + if not filters: + PrintCategories() + elif opt == '--counting': + if val not in ('total', 'toplevel', 'detailed'): + PrintUsage('Valid counting options are total, toplevel, and detailed') + counting_style = val + elif opt == '--root': + global _root + _root = val + elif opt == '--repository': + global _repository + _repository = val + elif opt == '--linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + PrintUsage('Line length must be digits.') + elif opt == '--exclude': + global _excludes + if not _excludes: + _excludes = set() + _excludes.update(glob.glob(val)) + elif opt == '--extensions': + ProcessExtensionsOption(val) + elif opt == '--headers': + ProcessHppHeadersOption(val) + elif opt == '--recursive': + recursive = True + elif opt == '--includeorder': + ProcessIncludeOrderOption(val) + + if not filenames: + PrintUsage('No files were specified.') + + if recursive: + filenames = _ExpandDirectories(filenames) + + if _excludes: + filenames = _FilterExcludedFiles(filenames) + + _SetOutputFormat(output_format) + _SetQuiet(quiet) + _SetVerboseLevel(verbosity) + _SetFilters(filters) + _SetCountingStyle(counting_style) + + filenames.sort() + return filenames + +def _ExpandDirectories(filenames): + """Searches a list of filenames and replaces directories in the list with + all files descending from those directories. Files with extensions not in + the valid extensions list are excluded. + + Args: + filenames: A list of files or directories + + Returns: + A list of all files that are members of filenames or descended from a + directory in filenames + """ + expanded = set() + for filename in filenames: + if not os.path.isdir(filename): + expanded.add(filename) + continue + + for root, _, files in os.walk(filename): + for loopfile in files: + fullname = os.path.join(root, loopfile) + if fullname.startswith('.' + os.path.sep): + fullname = fullname[len('.' + os.path.sep):] + expanded.add(fullname) + + filtered = [] + for filename in expanded: + if os.path.splitext(filename)[1][1:] in GetAllExtensions(): + filtered.append(filename) + return filtered + +def _FilterExcludedFiles(fnames): + """Filters out files listed in the --exclude command line switch. File paths + in the switch are evaluated relative to the current working directory + """ + exclude_paths = [os.path.abspath(f) for f in _excludes] + # because globbing does not work recursively, exclude all subpath of all excluded entries + return [f for f in fnames + if not any(e for e in exclude_paths + if _IsParentOrSame(e, os.path.abspath(f)))] + +def _IsParentOrSame(parent, child): + """Return true if child is subdirectory of parent. + Assumes both paths are absolute and don't contain symlinks. + """ + parent = os.path.normpath(parent) + child = os.path.normpath(child) + if parent == child: + return True + + prefix = os.path.commonprefix([parent, child]) + if prefix != parent: + return False + # Note: os.path.commonprefix operates on character basis, so + # take extra care of situations like '/foo/ba' and '/foo/bar/baz' + child_suffix = child[len(prefix):] + child_suffix = child_suffix.lstrip(os.sep) + return child == os.path.join(prefix, child_suffix) + +def main(): + filenames = ParseArguments(sys.argv[1:]) + backup_err = sys.stderr + try: + # Change stderr to write with replacement characters so we don't die + # if we try to print something containing non-ASCII characters. + sys.stderr = codecs.StreamReader(sys.stderr, 'replace') + + _cpplint_state.ResetErrorCounts() + for filename in filenames: + ProcessFile(filename, _cpplint_state.verbose_level) + # If --quiet is passed, suppress printing error count unless there are errors. + if not _cpplint_state.quiet or _cpplint_state.error_count > 0: + _cpplint_state.PrintErrorCounts() + + if _cpplint_state.output_format == 'junit': + sys.stderr.write(_cpplint_state.FormatJUnitXML()) + + finally: + sys.stderr = backup_err + + sys.exit(_cpplint_state.error_count > 0) + +if __name__ == '__main__': + main() diff --git a/tools/gdb/stubs.py b/tools/gdb/stubs.py new file mode 100644 index 0000000..fc43d9c --- /dev/null +++ b/tools/gdb/stubs.py @@ -0,0 +1,11 @@ + +import sys +# path to local stubs package +sys.path.insert(1, './tools/gdb/') +# path to checkout of qmp package 'https://pypi.org/project/qemu.qmp/' +sys.path.insert(1, '/proj/i4stubs/tools/python/') +try: + import stubs +except Exception as e: + print(f"could not load gdb stubs plugin: {str(e)}", file=sys.stderr) + pass diff --git a/tools/gdb/stubs/__init__.py b/tools/gdb/stubs/__init__.py new file mode 100644 index 0000000..bd5ee12 --- /dev/null +++ b/tools/gdb/stubs/__init__.py @@ -0,0 +1,176 @@ + +from . import monitor +from . import idt +from . import gdt +from . import paging + +import gdb +import traceback + +qemu = monitor.Monitor('qmp.sock') + +def _active_cr3(): + i = gdb.selected_inferior() + cr3_desc = i.architecture().registers().find('cr3') + cr3 = gdb.selected_frame().read_register(cr3_desc) + val = cr3.cast(gdb.lookup_type('unsigned long long')) + return val + +class PageVisualizer(gdb.Command): + """resolves a virtual adress: vaview [] """ + + def __init__(self, monitor): + super(PageVisualizer, self).__init__("vaview", gdb.COMMAND_SUPPORT) + self.monitor = monitor + pass + + def invoke(self, arg, from_tty): + args = gdb.string_to_argv(arg) + base = None + va = None + if len(args) == 1: + base = _active_cr3() + va = gdb.parse_and_eval(args[0]) + pass + elif len(args) == 2: + base = gdb.parse_and_eval(args[0]) + va = gdb.parse_and_eval(args[1]) + else: + raise gdb.GdbError("vaview [] ") + + try: + base = int(base) + if va.type.code == gdb.TYPE_CODE_FUNC: + va = int(va.address) + else: + va = int(va) + pass + + mmu = paging.MMU(self.monitor, paging.Arch.X86_64) + page, size, offset, entries = mmu.resolve(base, va) + + parts = mmu.split_addr(va) + print( + f"cr3: 0x{base:x}; vaddr: 0x{va:x} = " + f"({ '|'.join([hex(int(p)) for p in parts[0]]) })" + ) + + for e in entries: + print(e) + if page is not None and offset is not None: + print(f"0x{va:x} -> 0x{page:x}:{offset:x}") + else: + print(f"0x{va:x} -> ") + except Exception as e: + traceback.print_exc() + raise e + pass + +def _gdtidtargs(arg, kind): + args = gdb.string_to_argv(arg) + if len(args) == 0: + mapping = _active_cr3() + cpuid = current_cpuid() + regs = qemu.registers()[cpuid] + base, limit = regs[kind] + pass + elif len(args) == 2: + # base, limit + mapping = _active_cr3() + base = gdb.parse_and_eval(args[0]) + limit = gdb.parse_and_eval(args[1]) + try: + limit = int(limit) + if base.type.code == gdb.TYPE_CODE_FUNC: + base = int(base.address) + else: + base = int(base) + except Exception as e: + traceback.print_exc() + raise e + pass + elif len(args) == 3: + # mapping, cr3, limit + mapping = gdb.parse_and_eval(args[0]) + base = gdb.parse_and_eval(args[1]) + limit = gdb.parse_and_eval(args[2]) + try: + limit = int(limit) + if base.type.code == gdb.TYPE_CODE_FUNC: + base = int(base.address) + else: + base = int(base) + except Exception as e: + traceback.print_exc() + raise e + pass + else: + raise gdb.GdbError("invalid args") + mapping = int(mapping) + return mapping, base, limit + +class GDTVisualizer(gdb.Command): + """print the GDT: gdtview [[] ]""" + + def __init__(self, monitor): + super(GDTVisualizer, self).__init__("gdtview", gdb.COMMAND_SUPPORT) + self.monitor = monitor + pass + + def invoke(self, arg, from_tty): + mapping, base, limit = _gdtidtargs(arg, 'gdt') + mmu = paging.MMU(self.monitor, paging.Arch.X86_64) + gdt.GDT(mmu, mapping, base, limit).print() + +class InterruptGateVisualizer(gdb.Command): + """print the IDT: idtview [[] ]""" + + def __init__(self, monitor): + super(InterruptGateVisualizer, self).__init__( + "idtview", gdb.COMMAND_USER) + self.monitor = monitor + pass + + def invoke(self, args, from_tty): + mapping, base, limit = _gdtidtargs(args, 'idt') + mmu = paging.MMU(self.monitor, paging.Arch.X86_64) + try: + idt.IDT(mmu, mapping, base, limit).print() + except Exception as e: + traceback.print_exc() + raise e + +class CurrentThread(gdb.Function): + """Fetch the current thread `Dispatcher::life` """ + def __init__(self): + super(CurrentThread, self).__init__("current") + pass + + def invoke(self): + if not gdb.selected_thread(): + return None + + cpuid = current_cpuid() + + sym, field = gdb.lookup_symbol('Dispatcher::life') + instances = sym.value()['instances'] + thread_ptr = instances[cpuid]['value'] + + return thread_ptr + +def current_cpuid(): + inferior = gdb.selected_inferior() + + max_threadid = len(inferior.threads()) + assert (max_threadid > 0) + _, threadid, _ = gdb.selected_thread().ptid + assert (threadid > 0 and threadid <= max_threadid) + + cpuid = threadid - 1 + return cpuid + pass + +InterruptGateVisualizer(qemu) +PageVisualizer(qemu) +GDTVisualizer(qemu) +CurrentThread() diff --git a/tools/gdb/stubs/gdt.py b/tools/gdb/stubs/gdt.py new file mode 100644 index 0000000..95448b1 --- /dev/null +++ b/tools/gdb/stubs/gdt.py @@ -0,0 +1,209 @@ + +from . import helper + +class SegmentDescriptor(object): + pass + + def __init__(self, memview): + assert (len(memview) > 7) + self.kind = '' + + raw = int.from_bytes(memview[0:8], byteorder='little') + + self.base = helper.bits(raw, 16, 24) \ + | helper.bits(raw, 32 + 24, 8) << 24 + + self.limit = helper.bits(raw, 0, 16) \ + | helper.bits(raw, 32 + 16, 4) << 16 + + self.type = helper.bits(raw, 32 + 8, 4) + + self.g = helper.bits(raw, 32 + 23, 1) + self.p = helper.bits(raw, 32 + 15, 1) + self.l = helper.bits(raw, 32 + 21, 1) + self.dpl = helper.bits(raw, 32 + 13, 2) + pass + + def create(memview): + raw = int.from_bytes(memview[0:8], byteorder='little') + feature = helper.bits(raw, 32 + 11, 2) + if feature == 2: + return DataSegmentDescriptor(memview) + elif feature == 3: + return CodeSegmentDescriptor(memview) + else: + feature = helper.bits(raw, 32 + 8, 5) + if feature == 0x00: + return NullDescriptor(memview) + elif feature == 0x0f: + return CallGateDescriptor(memview) + else: + return SystemSegmentDescriptor.create(memview) + pass + + def __str__(self): + verbose = self.str_verbose() + return f"<{self.kind}> 0x{self.base:x}:{self.limit:x} [{verbose}]" + if verbose: + return f"<{self.kind}> 0x{self.base:x}:{self.limit:x} [{verbose}]" + else: + return f"<{self.kind}> 0x{self.base:x}:{self.limit:x}" + + def size(self): + return None + +class DataSegmentDescriptor(SegmentDescriptor): + def __init__(self, memview): + super(DataSegmentDescriptor, self).__init__(memview) + self.kind = 'data' + + self.raw = int.from_bytes(memview[0:8], byteorder='little') + + self.b = helper.bits(self.raw, 32 + 22, 1) + self.avl = helper.bits(self.raw, 32 + 20, 1) + + self.e = helper.bits(self.raw, 32 + 10, 1) + self.w = helper.bits(self.raw, 32 + 9, 1) + self.a = helper.bits(self.raw, 32 + 8, 1) + + def str_verbose(self): + msg = "|".join([ + f"g{self.g}", f"b{self.b}", f"l{self.l}", + f"avl{self.avl}", f"p{self.p}", f"dpl{self.dpl}", + f"e{self.e}", f"w{self.w}", f"a{self.a} {self.raw:x}" + ]) + if self.l: + msg = msg + '"invalid l"' + pass + return msg + + def size(self): + return 8 + +class CodeSegmentDescriptor(SegmentDescriptor): + def __init__(self, memview): + super(CodeSegmentDescriptor, self).__init__(memview) + self.kind = 'code' + + self.raw = int.from_bytes(memview[0:8], byteorder='little') + + self.d = helper.bits(self.raw, 32 + 22, 1) + self.avl = helper.bits(self.raw, 32 + 20, 1) + + self.c = helper.bits(self.raw, 32 + 10, 1) + self.r = helper.bits(self.raw, 32 + 9, 1) + self.a = helper.bits(self.raw, 32 + 8, 1) + + def size(self): + return 8 + + def str_verbose(self): + return "|".join([ + f"g{self.g}", f"d{self.d}", f"l{self.l}", + f"avl{self.avl}", f"p{self.p}", f"dpl{self.dpl}", + f"c{self.c}", f"r{self.r}", f"a{self.a}", f"{self.raw:x}" + ]) + +class SystemSegmentDescriptor(SegmentDescriptor): + + def __init__(self, memview): + super(SystemSegmentDescriptor, self).__init__(memview) + self.kind = 'system' + self.raw = int.from_bytes(memview[0:16], byteorder='little') + pass + + def create(memview): + raw = int.from_bytes(memview[0:8], byteorder='little') + type = helper.bits(raw, 32 + 8, 4) + masks = [(TSSDescriptor, 0x9)] + for ctor, mask in masks: + if type & mask == mask: + return ctor(memview) + pass + else: + raise Exception("no matching Descriptor") + pass + + def size(self): + return 16 + + def str_verbose(self): + return f"raw: {self.raw}" + +class TSSDescriptor(SystemSegmentDescriptor): + + def __init__(self, memview): + super(SystemSegmentDescriptor, self).__init__(memview) + self.kind = 'tss' + self.raw = int.from_bytes(memview[0:16], byteorder='little') + + self.type = helper.bits(self.raw, 32 + 8, 4) + + self.base = helper.bits(self.raw, 16, 24) \ + | helper.bits(self.raw, 32 + 24, 8) << 24 \ + | helper.bits(self.raw, 64, 32) << 32 + + self.avl = helper.bits(self.raw, 32 + 20, 1) + + self.null = helper.bits(self.raw, 3 * 32 + 8, 5) + self.reserved_a = helper.bits(self.raw, 3 * 32 + 13, 32 - 13) + self.reserved_b = helper.bits(self.raw, 3 * 32, 8) + assert (self.null == 0) + + def str_verbose(self): + b = helper.bits(self.raw, 32 + 22, 1) + e = helper.bits(self.raw, 32 + 12, 1) + msg = "|".join([ + f"rsvd: {self.reserved_a}", f"0: {self.null}", + f"rsvd: {self.reserved_b}" f"g: {self.g}", + f"0{b}", f"0{self.l}", f"avl{self.avl}", + f"p{self.p}", f"dpl{self.dpl}", f"0{e}", + f"type: {self.type:x}", f"{self.raw:x}" + ]) + return msg + +class NullDescriptor(SystemSegmentDescriptor): + def __init__(self, memview): + super(NullDescriptor, self).__init__(memview) + self.kind = 'call gate' + self.kind = 'null' + self.raw = int.from_bytes(memview[0:8], byteorder='little') + pass + + def size(self): + return 8 + + def str_verbose(self): + return f"{self.raw:x}" + +class CallGateDescriptor(SystemSegmentDescriptor): + def __init__(self, mems): + super(CallGateDescriptor, self).__init__(mems) + self.kind = 'call gate' + pass + pass + +class GDT: + def __init__(self, mmu, mapping, base, limit): + self.mmu = mmu + self.mapping = mapping + self.limit = limit + self.base = base + pass + + def print(self): + offset = 0 + print(f"base: 0x{self.base:x}, limit: 0x{self.limit:x}") + while offset < self.limit + 1: + + desc_bytes = self.mmu.linear_bytes( + self.mapping, + self.base + offset, + 16 + ) + + segment = SegmentDescriptor.create(desc_bytes) + print(f"[0x{offset:x}]: {str(segment)}") + offset += segment.size() + pass + pass diff --git a/tools/gdb/stubs/helper.py b/tools/gdb/stubs/helper.py new file mode 100644 index 0000000..3dbcc3e --- /dev/null +++ b/tools/gdb/stubs/helper.py @@ -0,0 +1,33 @@ + +def compact_entries(entries, cmp_eq): + l = [] + for idx, entry in entries: + # prepare first item in list + if len(l) == 0: + l.append((idx, idx, entry)) + continue + + # check for duplicate entry + if cmp_eq(l[-1][2], entry): + old = l.pop() + l.append((old[0], idx, old[2])) + continue + + l.append((idx, idx, entry)) + return l + +def bits(value, start, nbits): + # drop any bits positioned higher than (start + nbits) + mask = 1 << (start + nbits) # mind the overflow in any other language without bigints! + mask = mask - 1 + value = value & mask + + # select only nbits + mask = (1 << nbits) - 1 + mask = mask << start + value = value & mask + + # shift result accordingly + value = value >> start + + return value diff --git a/tools/gdb/stubs/idt.py b/tools/gdb/stubs/idt.py new file mode 100644 index 0000000..7d2b9f7 --- /dev/null +++ b/tools/gdb/stubs/idt.py @@ -0,0 +1,68 @@ + +import gdb +from . import helper + +class IDT: + class InterruptTrapGate: + def __init__(self, raw): + self.raw = raw # 128-bit type + self.offset = helper.bits(self.raw, 0, 16) \ + | helper.bits(self.raw, 32 + 16, 16) << 16\ + | helper.bits(self.raw, 64, 32) << 32 + self.segment = helper.bits(self.raw, 16, 16) + self.ist = helper.bits(self.raw, 32, 4) + self.type = helper.bits(self.raw, 32 + 8, 4) + self.dpl = helper.bits(self.raw, 32 + 13, 2) + self.p = helper.bits(self.raw, 32 + 15, 1) + + def _get_symbol(self): + block = gdb.block_for_pc(self.offset) + if block is None: + return "?" + if block.function is None: + return "?" + return block.function + + def __str__(self): + symbol = self._get_symbol() + addr = f"0x{self.segment:x}:0x{self.offset:x}" + bits = "|".join([ + f"off:{self.offset:x}", f"p:{self.p}", + f"dpl:{self.dpl}", f"type:{self.type:x}", + f"ist:{self.ist:x}", f"ss:{self.segment:x}" + ]) + return f"{addr} <{symbol}> [{bits}] raw={self.raw:032x}" + + def __init__(self, mmu, mapping, base, limit): + self.mmu = mmu + self.mapping = mapping + self.base = base + self.limit = limit + self.desc_size = 16 + self.nentry = (limit + 1) // self.desc_size + pass + + def print(self): + entries = [] + for i in range(0, self.nentry): + offset = i * self.desc_size + desc_bytes = self.mmu.linear_bytes( + self.mapping, + self.base + offset, + self.desc_size + ) + desc = int.from_bytes(desc_bytes, 'little') + gate = self.InterruptTrapGate(desc) + entries.append((i, gate)) + + def cmp_eq(a, b): + return a.offset == b.offset and a.segment == b.segment + + compact = helper.compact_entries(entries, cmp_eq) + for start, stop, gate in compact: + if start == stop: + print(f"[{start}]:\t{str(gate)}") + else: + print(f"[{start}-{stop}]:\t{str(gate)}") + pass + pass diff --git a/tools/gdb/stubs/monitor.py b/tools/gdb/stubs/monitor.py new file mode 100644 index 0000000..9e2a310 --- /dev/null +++ b/tools/gdb/stubs/monitor.py @@ -0,0 +1,131 @@ + +import asyncio +import re +from qemu.qmp import QMPClient + +class Monitor: + def __init__(self, socket): + self.socket = socket + self.qmp = QMPClient(f'Monitor: {socket}') + pass + + async def _connect(self): + await self.qmp.connect(self.socket) + + async def _disconnect(self): + await self.qmp.disconnect() + + async def _hmc(self, cmd): + raw = await self.qmp.execute( + 'human-monitor-command', {'command-line': cmd}) + return raw + + def registers(self): + registers = dict() + + async def query_registers(): + await self._connect() + raw = await self._hmc('info registers -a') + await self._disconnect() + return raw + raw = asyncio.run(query_registers()) + + # each paragraph of `raw` contains the registers for a logical CPU + cpu_split = raw.split('\r\n\r\n') + cpu_split_stripped = (x.strip() for x in cpu_split) + cpu_split = [s for s in cpu_split_stripped if s] + + # general purpose registers + def fetch_gpr(input): + registers = dict() + gprs = ['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', + 'RBP', 'RSP', 'R8', 'R9', 'R10', 'R11', + 'R12', 'R13', 'R14', 'R15', 'RIP'] + for gpr in gprs: + pattern = rf"{gpr}\s?=(?P<{gpr}>\w{{16}})" + match = re.search(pattern, input) + value_raw = match.group(gpr) + value = int(value_raw, 16) + registers[gpr.lower()] = value + return registers + + # control registers + def fetch_cr(input): + registers = dict() + for cr in ['CR0', 'CR2', 'CR3', 'CR4']: + pattern = rf"{cr}=(?P<{cr}>\w{{8,16}})" + match = re.search(pattern, input) + value_raw = match.group(cr) + value = int(value_raw, 16) + registers[cr.lower()] = value + return registers + + # desriptor tables + def fetch_dt(input): + registers = dict() + for tbl in ['GDT', 'IDT']: + pattern = rf"{tbl}\s*=\s*(?P<{tbl}_base>\w{{16}})" \ + rf"\s+(?P<{tbl}_limit>\w{{8}})" + match = re.search(pattern, input) + base_raw = match.group(f"{tbl}_base") + limit_raw = match.group(f"{tbl}_limit") + base = int(base_raw, 16) + limit = int(limit_raw, 16) + registers[tbl.lower()] = (base, limit) + return registers + + registers = dict() + for cpuid, regstr in enumerate(cpu_split): + assert (regstr is not None and len(regstr) > 0) + registers[cpuid] = dict() + registers[cpuid].update(fetch_gpr(regstr)) + registers[cpuid].update(fetch_cr(regstr)) + registers[cpuid].update(fetch_dt(regstr)) + + return registers + + def virtual_memory(self, addr, size): + # byte, word, double word, giant + types = {1: 'b', 2: 'w', 4: 'd', 8: 'g'} + assert (size in types) + + async def query_virtual_memory(): + await self._connect() + res = await self._hmc(f"x/x{types[size]} {addr}") + await self._disconnect() + return res + + res = asyncio.run(query_virtual_memory()) + match = re.match(r"[a-f\d]+:\s*(0x[a-f\d]+)", res) + assert (match) + return int(match.group(1), 16) + + def physical_memory(self, addr, size): + # byte, word, double word, giant + types = {1: 'b', 2: 'w', 4: 'd', 8: 'g'} + assert (size in types) + + async def query_physical_memory(): + await self._connect() + res = await self._hmc(f"xp/x{types[size]} {addr}") + await self._disconnect() + return res + + res = asyncio.run(query_physical_memory()) + match = re.match(r"[a-f\d]+:\s*(0x[a-f\d]+)", res) + assert (match) + return int(match.group(1), 16) + + def gva2gpa(self, addr): + async def query_gva2gpa(): + await self._connect() + res = await self._hmc(f"gva2gpa {addr}") + await self._disconnect() + return res + + res = asyncio.run(query_gva2gpa()) + if res == 'Unmapped\r\n': + return None + match = re.match(r"gpa:\s*0x([\da-f]+)", res) + assert (match) + return int(match.group(1), 16) diff --git a/tools/gdb/stubs/paging.py b/tools/gdb/stubs/paging.py new file mode 100644 index 0000000..d78c00e --- /dev/null +++ b/tools/gdb/stubs/paging.py @@ -0,0 +1,333 @@ +from enum import Enum +from . import helper + +class Arch(Enum): + X86 = 0 + X86_64 = 1 + +class PageLevel(Enum): + PML4 = 0 + DirectoryPtr = 1 + Directory = 2 + Table = 3 + +class PageTableEntry: + def __init__(self, base, idx, mmu, value): + self.base = base + self.idx = idx + self.raw = value + self.faulty = False + + self.present = helper.bits(value, 0, 1) + self.rw = helper.bits(value, 1, 1) + self.us = helper.bits(value, 2, 1) + self.pwt = helper.bits(value, 3, 1) + self.pcd = helper.bits(value, 4, 1) + self.a = helper.bits(value, 5, 1) + self.ps = helper.bits(value, 7, 1) + + self.r = helper.bits(value, 11, 1) + self.xd = helper.bits(value, 63, 1) + self.reference = helper.bits(value, 12, mmu.M) + self.rsvd = helper.bits(value, mmu.M, 64 - mmu.M) + pass + + def addr(self): + return self.reference << 12 + + def name(self): + raise Exception("must be implemented by inheriting class") + + def description(self): + raise Exception("must be implemented by inheriting class") + + def __str__(self): + return f"<{self.name()}> {self.description()}" + +class PML4Entry(PageTableEntry): + def __init__(self, base, idx, mmu, value): + super().__init__(base, idx, mmu, value) + if (self.ps != 0): + self.faulty = True + pass + + def name(self): + return f"PML4 (0x{self.base:x}[0x{self.idx:x}])" + + def description(self): + if not self.present: + return f"[p:0|raw: {self.raw:x}]" + return "[" + str.join('|', [ + "p:1", f"rw:{self.rw}", f"us:{self.us}", f"pwt:{self.pwt}", + f"pcd:{self.pcd}", f"a:{self.a}", f"rsvd:{self.ps}", + f"r:{self.r}", f"addr:0x{self.reference:x}", + f"rsvd:{self.rsvd}" + ]) + f"] = {self.raw:x}" + pass + +class PDPEntry(PageTableEntry): + + def __init__(self, base, idx, mmu, value): + super().__init__(base, idx, mmu, value) + self.d = helper.bits(value, 6, 1) + self.g = helper.bits(value, 8, 1) + self.pat = helper.bits(value, 12, 1) + self.xd = helper.bits(value, 63, 1) + if self.ps == 1: + self.reference = helper.bits(value, 30, mmu.M - (30 - 12)) + + def name(self): + return f"PDP (0x{self.base:x}[0x{self.idx:x}])" + + def addr(self): + if self.ps == 1: + return self.reference << (12 + 18) + return self.reference << 12 + + def description(self): + if not self.present: + return f"[p:0|raw: {self.raw:x}]" + return "[" + str.join('|', [ + "p:1", f"rw:{self.rw}", f"us:{self.us}", f"pwt:{self.pwt}", + f"pcd:{self.pcd}", f"a:{self.a}", f"d:{self.d}", f"ps:{self.ps}", + f"g:{self.g}", f"r:{self.r}", f"addr:0x{self.reference:x}", + f"rsvd:{self.rsvd}" + ]) + f"] = {self.raw:x}" + pass + +class PDEntry(PageTableEntry): + + def __init__(self, base, idx, mmu, value): + super().__init__(base, idx, mmu, value) + self.d = helper.bits(value, 6, 1) + self.g = helper.bits(value, 8, 1) + self.pat = helper.bits(value, 12, 1) + self.xd = helper.bits(value, 63, 1) + if self.ps == 1: + self.reference = helper.bits(value, 21, mmu.M - (21 - 12)) + + def addr(self): + if self.ps == 1: + return self.reference << (12 + 9) + return self.reference << 12 + + def name(self): + return f"PD (0x{self.base:x}[0x{self.idx:x}])" + + def description(self): + if not self.present: + return f"[p:0|raw: {self.raw:x}]" + desc = [ + "p:1", f"rw:{self.rw}", f"us:{self.us}", f"pwt:{self.pwt}", + f"pcd:{self.pcd}", f"a:{self.a}", f"d:{self.d}", f"ps:{self.ps}", + f"g:{self.g}", f"r:{self.r}", f"r:{self.r}" + ] + if self.ps == 1: + desc.append(f"pat:{self.pat}") + desc = desc + [ + f"addr:0x{self.reference:x}", f"rsvd:{self.rsvd}", f"xd:{self.xd}" + ] + return "[" + str.join('|', desc) + f"] = {self.raw:x}" + pass + +class PTEntry(PageTableEntry): + + def __init__(self, base, idx, mmu, value): + super().__init__(base, idx, mmu, value) + self.d = helper.bits(value, 6, 1) + self.g = helper.bits(value, 8, 1) + self.xd = helper.bits(value, 63, 1) + + def name(self): + return f"PT (0x{self.base:x}[0x{self.idx:x}])" + + def description(self): + if not self.present: + return f"[p:0|raw: {self.raw:x}]" + desc = [ + "p:1", f"rw:{self.rw}", f"us:{self.us}", f"pwt:{self.pwt}", + f"pcd:{self.pcd}", f"a:{self.a}", f"d:{self.d}", f"ps:{self.ps}", + f"g:{self.g}", f"r:{self.r}", f"r:{self.r}", + f"addr:0x{self.reference:x}", f"rsvd:{self.rsvd}", f"xd:{self.xd}" + ] + return "[" + str.join('|', desc) + f"] = {self.raw:x}" + +class FourLevelPagingTable: + entries = 512 + pass + + def __init__(self, mmu, base): + self.mmu = mmu + self.base = base + self.descriptor_size = 8 + + def _create(self, base, idx, mmu, val): + pass + + def __iter__(self): + table = self + + class TableIterator: + def __init__(self): + self.idx = 0 + + def __next__(self): + if self.idx < FourLevelPagingTable.entries: + idx = self.idx + e = table.entry(self.idx) + self.idx = self.idx + 1 + return idx, e + raise StopIteration + return TableIterator() + + def name(self): + return f"table_{hex(self.base)}" + + def entry(self, idx): + offset = idx * self.descriptor_size + val = self.mmu.monitor.physical_memory( + self.base + offset, self.descriptor_size) + return self._create(self.base, idx, self.mmu, val) + pass + +class PML4Table(FourLevelPagingTable): + + def _create(self, base, idx, mmu, val): + return PML4Entry(base, idx, self.mmu, val) + pass + +class PageDirectoryPointerTable(FourLevelPagingTable): + + def _create(self, base, idx, mmu, val): + return PDPEntry(base, idx, self.mmu, val) + pass + +class PageDirectory(FourLevelPagingTable): + + def _create(self, base, idx, mmu, val): + return PDEntry(base, idx, self.mmu, val) + pass + pass + +class PageTable(FourLevelPagingTable): + + def _create(self, base, idx, mmu, val): + return PTEntry(base, idx, self.mmu, val) + pass + pass + +class MMU: + def __init__(self, monitor, arch): + self.monitor = monitor + if arch == Arch.X86: + self.bits = 32 + self.M = 20 + pass + else: + self.bits = 64 + self.M = 48 # todo check + pass + assert (self.bits == 64) # TODO x86 + pass + + def assert_width(self, addr): + assert ((1 << self.bits) > addr) + pass + + def split_addr(self, addr: int) -> list[int]: + parts = [] + # 4Ki pages + parts.append([ + (addr >> 39) & 0x1ff, + (addr >> 30) & 0x1ff, + (addr >> 21) & 0x1ff, + (addr >> 12) & 0x1ff, + addr & 0xfff + ]) + # 2Mi pages + parts.append([ + (addr >> 39) & 0x1ff, + (addr >> 30) & 0x1ff, + (addr >> 21) & 0x1ff, + addr & 0x1fffff + ]) + + # 1Gi pages + parts.append([ + (addr >> 39) & 0x1ff, + (addr >> 30) & 0x1ff, + addr & 0x3fffffff + ]) + return parts + + def resolve(self, base: int, addr: int): + + self.assert_width(addr) + + entries = list() + parts = self.split_addr(addr) + + pml4_tbl = PML4Table(self, base) + pml4_idx = parts[0][0] + pml4_entry = pml4_tbl.entry(pml4_idx) + entries.append(pml4_entry) + + if not pml4_entry.present: + return (None, None, None, entries) + + pdp_tbl = PageDirectoryPointerTable(self, pml4_entry.addr()) + pdp_idx = parts[0][1] + pdp_entry = pdp_tbl.entry(pdp_idx) + entries.append(pdp_entry) + + if not pdp_entry.present: + return (None, None, None, entries) + + if (pdp_entry.ps): + return pdp_entry.addr(), (4096 << 18), parts[2][2], entries + + pd_tbl = PageDirectory(self, pdp_entry.addr()) + pd_idx = parts[0][2] + pd_entry = pd_tbl.entry(pd_idx) + entries.append(pd_entry) + + if not pd_entry.present: + return (None, None, None, entries) + + if (pd_entry.ps): + return pd_entry.addr(), (4096 << 9), parts[1][3], entries + + pt_tbl = PageTable(self, pd_entry.addr()) + pt_idx = parts[0][3] + pt_entry = pt_tbl.entry(pt_idx) + entries.append(pt_entry) + + if not pt_entry.present: + return (None, None, None, entries) + + physical = pt_entry.addr() + return physical, 4096, parts[0][4], entries + + def linear_bytes(self, mapping, addr, len): + chunks = [] + while len > 0: + page, size, offset, _ = self.resolve(mapping, addr) + # read larger 8 byte blocks + if offset % 8 == 0: + while offset + 8 < size and len >= 8: + b = self.monitor.physical_memory(page + offset, 8) + chunks.append(int.to_bytes(b, 8, 'little')) + len = len - 8 + offset = offset + 8 + pass + + # read single bytes + while offset < size and len > 0: + b = self.monitor.physical_memory(page + offset, 1) + chunks.append(int.to_bytes(b, 1, 'little')) + len = len - 1 + offset = offset + 1 + pass + addr = page + size + pass + return bytes().join(chunks) diff --git a/tools/image.mk b/tools/image.mk new file mode 100644 index 0000000..6286274 --- /dev/null +++ b/tools/image.mk @@ -0,0 +1,101 @@ +# Generates a bootable ISO image that can be transferred to external media, such as CDs or USB sticks. +# This will install, in addition to your kernel, the bootloader GRUB (https://www.gnu.org/software/grub/). +# +# The target 'gemu-iso' is used to test the image generated with 'iso'. +# +# Assuming that a USB mass-storage devices is connected as, for instance /dev/sdc, the target 'usb-sdc' +# can be used to make your device bootable (requires root access, substitute sdc with the matching device). +# Alternatively, you can burn the .iso file directly to CD. + +DD = dd +XORRISO = xorriso +MKISO = grub-mkrescue + +ISODIR = $(BUILDDIR)-iso +ISOGRUBCFG = boot/grub/grub.cfg +ISOKERNEL = boot/kernel +ISOINITRD = initrd +GRUBTITLE = $(shell id -un)s $(PROJECT) +GRUBTIMEOUT = 2 +GRUBBIN = /usr/lib/grub/i386-pc + +# Default ISO target +iso: $(ISOFILE) + +# Create Grub config +$(ISODIR)/$(ISOGRUBCFG): + @echo "GEN $@" + @mkdir -p $(dir $@) + @/bin/echo -e "set timeout=$(GRUBTIMEOUT)\nset default=0\n\nmenuentry \"$(GRUBTITLE)\" {\n\tmultiboot /$(ISOKERNEL)\n\tmodule /$(ISOINITRD)\n\tboot\n}" > $@ + +# Strip debug symbols from kernel binary +$(ISODIR)/$(ISOKERNEL): all + @echo "STRIP $@" + @mkdir -p $(dir $@) + $(VERBOSE) $(STRIP) --strip-debug --strip-unneeded -p -o $@ $(KERNEL) + +# copy inital ramdisk +$(ISODIR)/$(ISOINITRD): all + @echo "CPY $@" + @mkdir -p $(dir $@) + @if [ -s $(INITRD) ] ; then cp -a $(INITRD) $@ ; else touch $@ ; fi + +# Pack to ISO +$(ISOFILE): $(ISODIR)/$(ISOKERNEL) $(ISODIR)/$(ISOINITRD) $(ISODIR)/$(ISOGRUBCFG) + @echo "ISO $@" + @which $(XORRISO) >/dev/null || echo "Xorriso cannot be found - if building the ISO fails, this may be the reason!" >&2 + $(VERBOSE) $(MKISO) -d $(GRUBBIN) -o $@ $(ISODIR) + +# Run ISO in Qemu +qemu-iso: $(ISOFILE) + $(QEMU) -cdrom $< -smp $(QEMUCPUS) $(QEMUFLAGS) + +# Run ISO in KVM +kvm-iso: $(ISOFILE) + $(QEMU) -cdrom $< -smp $(QEMUCPUS) $(KVMFLAGS) + +# Copy ISO to USB device +usb: $(ISOFILE) +ifeq (,$(USBDEV)) + @echo "The environment variable USBDEV must contain the path to the USB mass-storage device:" >&2 + @lsblk -o TYPE,KNAME,SIZE,MODEL -a -p | grep "^disk" | cut -b 6- + @exit 1 +else + $(VERBOSE) $(DD) if=$< of=$(USBDEV) bs=4M status=progress && sync +endif + +# Shorthand to copy ISO to a specific USB device +usb-%: + @$(MAKE) USBDEV=/dev/$* usb + +# Burn ISO to CD +cd: $(ISOFILE) +ifeq (,$(CDRWDEV)) + @echo "The environment variable CDRWDEV must contain the path to the CD/DVD writer" >&2 + @exit 1 +else + $(VERBOSE) $(XORRISO) -as cdrecord -v dev=$(CDRWDEV) -dao $< +endif + +# Shorthand to nurn ISO to specific CD device +cd-%: + @$(MAKE) CDRWDEV=/dev/$* cd + +# The standard target 'clean' removes the whole generated system, the object files, and the dependency files. +clean:: + @echo "RM $(ISODIR)" + $(VERBOSE) rm -rf "$(ISODIR)" "$(ISODIR)$(OPTTAG)" "$(ISODIR)$(NOOPTTAG)" "$(ISODIR)$(DBGTAG)" "$(ISODIR)$(VERBOSETAG)" + +# Documentation +help:: + @/bin/echo -e "" \ + " \e[3miso\e[0m Generates a bootable system image (File: $(ISOFILE))\n\n" \ + " \e[3mqemu-iso\e[0m Starts the system in QEMU by booting from the virtual CD drive\n\n" \ + " \e[3mkvm-iso\e[0m Same as \e[3mqemu-iso\e[0m, but with hardware acceleration\n\n" \ + " \e[3musb\e[0m Generates a bootable USB mass-storage device; the environment\n" \ + " variable \e[4mUSBDEV\e[0m should point to the USB device\n\n" \ + " \e[3mcd\e[0m Generates a bootable CD; the environment variable \e[4mCDRWDEV\e[0m\n" \ + " should point to the CD writer\n\n" + +# Phony targets +.PHONY: iso qemu-iso kvm-iso cd usb help diff --git a/tools/linter.mk b/tools/linter.mk new file mode 100644 index 0000000..1b1392b --- /dev/null +++ b/tools/linter.mk @@ -0,0 +1,30 @@ +# Perform static code checks + +TIDY ?= clang-tidy +CPPLINT ?= /usr/bin/env python3 "$(CURRENT_DIR)/cpplint.py" + +# Check sources with Clang Tidy +tidy:: +ifeq (,$(CC_SOURCES)) + @echo "(nothing to tidy)" +else + $(VERBOSE) $(TIDY) --format-style=google -header-filter=.* -warnings-as-errors="readability*" -checks="readability*,google-readability-casting,google-explicit-constructor,bugprone*,-bugprone-easily-swappable-parameters,-bugprone-implicit-widening-of-multiplication-result,-bugprone-narrowing-conversions,-bugprone-reserved-identifier,-readability-else-after-return,-readability-identifier-length,-readability-magic-numbers,-readability-use-anyofallof,-readability-function-cognitive-complexity" $(filter-out utils/png.cc,$(CC_SOURCES)) -- $(CXXFLAGS_ARCH) $(CXXFLAGS_DEFAULT) $(CXXFLAGS_OPT) +endif + +# Check sources with cpplint +lint:: + @if $(CPPLINT) --quiet --recursive . ; then \ + echo "Congratulations, coding style obeyed!" ; \ + else \ + echo "Coding style violated -- see CPPLINT.cfg for details" ; \ + exit 1 ; \ + fi + +# Documentation +help:: + @/bin/echo -e "" \ + " \e[3mlint\e[0m Checks the coding style using \e[4mCPPLINT\e[0m\n\n" \ + " \e[3mtidy\e[0m Uses \e[4mClang Tidy\e[0m for a static code analysis\n\n" + +# Phony targets +.PHONY: tidy lint help diff --git a/tools/qemu.mk b/tools/qemu.mk new file mode 100644 index 0000000..12c12b6 --- /dev/null +++ b/tools/qemu.mk @@ -0,0 +1,65 @@ +# Targets for running and debugging in Qemu/KVM + +QEMUCPUS ?= 4 +INITRD ?= /dev/null +QEMUSERIAL ?= pty +QEMUFLAGS = -k en-us -serial $(QEMUSERIAL) -d guest_errors -m 2048 +# According to qemu(1): "Creates a backend using PulseAudio. This backend is +# available on most systems." So we use pa as audiodev. +QEMUFLAGS += -audiodev pa,id=stubsad -machine pcspk-audiodev=stubsad +# Switch to curses if no graphical output is available +ifeq ($(DISPLAY),) + QEMUFLAGS += -display curses +endif +KVMFLAGS = -enable-kvm -cpu host $(QEMUFLAGS) +DBGFLAGS = -no-shutdown -no-reboot -qmp unix:qmp.sock,server=on,wait=off -monitor vc +DBGKERNEL ?= $(KERNEL64) +DBGARCH ?= i386:x86-64 +QEMU ?= qemu-system-x86_64 +QEMUKERNEL := -kernel $(KERNEL) -initrd $(INITRD) +GDB = $(PREFIX)gdb +GDBFLAG = --eval-command="source tools/gdb/stubs.py" + +# Run the kernel in Qemu +qemu: all + $(QEMU) $(QEMUKERNEL) -smp $(QEMUCPUS) $(QEMUFLAGS) + +# Execute Qemu with activated GDB stub and directly connect GDB to the spawned Qemu. +qemu-gdb: all + $(GDB) $(GDBFLAG) $(DBGKERNEL) \ + -ex "set arch $(DBGARCH)" \ + -ex "target remote | exec $(QEMU) -gdb stdio $(QEMUKERNEL) -smp $(QEMUCPUS) -S $(QEMUFLAGS) $(DBGFLAGS)" + +qemu-gdb-tmux: all + tmux new-session -s StuBS -n QEMU bash -c "tmux new-window -n gdb $(GDB) $(DBGKERNEL) -ex 'set arch $(DBGARCH)' -ex 'target remote localhost:1234'; $(QEMU) -s -S $(QEMUKERNEL) -initrd /dev/null -smp $(QEMUCPUS) $(QEMUFLAGS) -display curses $(DBGFLAGS)" + +# Runs StuBS in Qemu with with hardware accelerations (KVM support) enabled +# The started emulator provides several virtual CPUs that execute in parallel. +kvm: all + $(QEMU) $(QEMUKERNEL) -smp $(QEMUCPUS) $(KVMFLAGS) + +# Executes Qemu with KVM suppot with activated GDB stub +# and directly connect GDB to the spawned Qemu. +# Please note: Software breakpoints may not work before the stubs kernel +# has switched to long mode -- so we use a hardware breakpoint to stop +# at `kernel_init` (the C++ entry point) +kvm-gdb: all + $(GDB) $(GDBFLAG) $(DBGKERNEL) \ + -ex "set arch $(DBGARCH)" \ + -ex "target remote | exec $(QEMU) -gdb stdio $(QEMUKERNEL) -smp $(QEMUCPUS) -S $(KVMFLAGS) $(DBGFLAGS)" \ + -ex "hbreak kernel_init" \ + -ex "continue" + +# Help for Qemu targets +help:: + @/bin/echo -e "" \ + " \e[3mqemu\e[0m Starts $(PROJECT) in QEMU\n" \ + " Due to the internal design of QEMU, some things (especially\n" \ + " race conditions) might behave different compared to hardware!\n\n" \ + " \e[3mqemu-gdb\e[0m Starts $(PROJECT) in QEMU with internal GDB stub and attaches\n" \ + " it to a GDB session allowing step-by-step debugging\n\n" \ + " \e[3mkvm\e[0m Starts $(PROJECT) in KVM, a hardware-accelerated virtual machine\n\n" \ + " \e[3mkvm-gdb\e[0m Same as \e[3mqemu-gdb\e[0m, but with hardware acceleration\n\n" + +# Phony targets +.PHONY: qemu kvm qemu-gdb kvm-gdb help diff --git a/tools/remote.mk b/tools/remote.mk new file mode 100644 index 0000000..7044b56 --- /dev/null +++ b/tools/remote.mk @@ -0,0 +1,91 @@ +# use the sys Infrastructure + +SOLUTIONDIR = /fs/stubs/solutions + +SOLUTIONPREFIX = musterloesung-m + +SOLUTIONTMPDIR = .solution + +NETBOOTDIR = /fs/stubs/students +# Will use either the username from the ssh configuration or, by default, `whoami`@faui06.cs.fau.de +NETBOOTSSH ?= mars.cs.tu-dortmund.de +USERNAME ?= $(USER) + +##HALLOFFAMESRC = /proj/i4stubs/halloffame/halloffame.iso +#HALLOFFAMEISO = $(SOLUTIONTMPDIR)/halloffame.iso + +# Fetch an ISO including all hall of fame images +#$(HALLOFFAMEISO): +# $(VERBOSE) echo "Get Hall-Of-Fame ISO" ; \ +# mkdir -p $(SOLUTIONTMPDIR) ; \ +# if [ -f "$(HALLOFFAMESRC)" ] ; then \ +# cp $(HALLOFFAMESRC) $@ ; \ +# else \ +# echo "via SSH $(USERNAME)@$(NETBOOTSSH)" ; \ +# scp $(USERNAME)@$(NETBOOTSSH):$(HALLOFFAMESRC) $@ ; \ +# fi + +# 'halloffame' starts an ISO including all hall of fame images +# with 4 cores and KVM virtualization +#halloffame: $(HALLOFFAMEISO) +# $(QEMU) -cdrom $< -smp $(QEMUCPUS) $(KVMFLAGS) ; \ + +# 'halloffame-old' starts an ISO including all hall of fame images +# in compatibility mode (single core, software emulation) +#halloffame-old: $(HALLOFFAMEISO) +# $(QEMU) -cdrom $< ; \ +# + +# The target 'netboot' copies the resulting StuBS kernel to the base directory +# of our tftp server. The tftp server enables the text systems to boot the image +# via pxelinux. +netboot: all + $(VERBOSE) initrd="$(INITRD)" ; \ + if [ ! -s "$$initrd" ] ; then \ + initrd="$(BUILDDIR)/fake-initrd" ; \ + echo "(none)" > "$$initrd" ; \ + fi ; \ + if [ -d "$(NETBOOTDIR)" ] ; then \ + echo "CPY $(NETBOOTDIR)/$(USERNAME)/kernel" ; \ + install -m 644 $(KERNEL) $(NETBOOTDIR)/$(USERNAME)/kernel ; \ + echo "CPY $(NETBOOTDIR)/$(USERNAME)/initrd.img" ; \ + install -m 644 "$$initrd" $(NETBOOTDIR)/$(USERNAME)/initrd.img ; \ + else \ + echo "via SSH $(USERNAME)@$(NETBOOTSSH)" ; \ + echo "SSH $(USERNAME)@$(NETBOOTSSH)@$(NETBOOTDIR)/$(USERNAME)/kernel" ; \ + echo "SSH $(USERNAME)@$(NETBOOTSSH)@$(NETBOOTDIR)/$(USERNAME)/initrd.img" ; \ + tar --mode="u=rw,og=r" --transform='flags=r;s|$(KERNEL)|kernel|' --transform="flags=r;s|$$initrd|initrd.img|" -cz $(KERNEL) "$$initrd" | \ + ssh "$(USERNAME)@$(NETBOOTSSH)" "cat - | tar -xvzp -C $(NETBOOTDIR)/\`id -run\`/" ; \ + fi + +# 'solution-1' starts our solution for exercise 1 using KVM. +# Of course, other exercise numbers can be supplied, too. +solution-%: + $(VERBOSE) echo "Solution for $(PROJECT) assignment $*" ; \ + if [ -d "$(SOLUTIONDIR)" ] ; then \ + $(QEMU) -kernel $(SOLUTIONDIR)/$(SOLUTIONPREFIX)$*.elf -initrd $(SOLUTIONDIR)/$(SOLUTIONPREFIX)$*.rd -smp $(QEMUCPUS) $(KVMFLAGS) ; \ + else \ + echo "via SSH $(USERNAME)@$(NETBOOTSSH)" ; \ + mkdir -p $(SOLUTIONTMPDIR) ; \ + bash -c "scp $(USERNAME)@$(NETBOOTSSH):$(SOLUTIONDIR)/$(SOLUTIONPREFIX)$*.{elf,rd} $(SOLUTIONTMPDIR)" && \ + $(QEMU) -kernel $(SOLUTIONTMPDIR)/$(SOLUTIONPREFIX)$*.elf -initrd $(SOLUTIONTMPDIR)/$(SOLUTIONPREFIX)$*.rd -smp $(QEMUCPUS) $(KVMFLAGS) ; \ + fi + +# The standard target 'clean' removes the temporary solution directory +clean:: + @echo "RM $(SOLUTIONTMPDIR)" + $(VERBOSE) rm -rf "$(SOLUTIONTMPDIR)" + +# Documentation +help:: + @/bin/echo -e "" \ + " \e[3mnetboot\e[0m Copies $(PROJECT) to the network share, allowing the test systems\n" \ + " to boot your system. Define the shell variable USERNAME to use a specific user.\n" \ + " This is useful if your local username is different from the one on mars.\n\n\n" \ + "Apart from the above targets that run your implementation, our solution can\n" \ + "be run in KVM (at least when called in the SysLab or IRB pool) using the target\n\n" \ + " \e[3msolution-\e[2;3mexercise\e[0m\n\n" \ + "where \e[2;3mexercise\e[0m is the number of the exercise whose solution should be executed.\n\n" + +# Phony targets +.PHONY: netboot help diff --git a/types.h b/types.h new file mode 100644 index 0000000..e7699e1 --- /dev/null +++ b/types.h @@ -0,0 +1,65 @@ +/*! \file + * \brief Definition of standard integer types with specified widths and their limits + */ + +#pragma once + +// Standard Integer Types +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +typedef unsigned long long uintptr_t; + +typedef __SIZE_TYPE__ size_t; + +typedef char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef long long int64_t; +typedef long long intptr_t; + +typedef long int ssize_t; + +typedef __PTRDIFF_TYPE__ ptrdiff_t; + +// validate typedef size +static_assert(sizeof(int8_t) == (1), "Wrong size for 'int8_t'"); +static_assert(sizeof(int16_t) == (2), "Wrong size for 'int16_t'"); +static_assert(sizeof(int32_t) == (4), "Wrong size for 'int32_t'"); +static_assert(sizeof(int64_t) == (8), "Wrong size for 'int64_t'"); +static_assert(sizeof(intptr_t) == sizeof(void*), "Wrong size for 'intptr_t'"); +static_assert(sizeof(uint8_t) == (1), "Wrong size for 'uint8_t'"); +static_assert(sizeof(uint16_t) == (2), "Wrong size for 'uint16_t'"); +static_assert(sizeof(uint32_t) == (4), "Wrong size for 'uint32_t'"); +static_assert(sizeof(uint64_t) == (8), "Wrong size for 'uint64_t'"); +static_assert(sizeof(uintptr_t) == sizeof(void*), "Wrong size for 'uintptr_t'"); + +#ifndef NULL +#define NULL ((uintptr_t)0) +#endif + +// Limits +#define INT8_MIN (-__INT8_MAX__-1) +#define INT8_MAX (__INT8_MAX__) +#define INT16_MIN (-__INT16_MAX__-1) +#define INT16_MAX (__INT16_MAX__) +#define INT32_MIN (-__INT32_MAX__-1) +#define INT32_MAX (__INT32_MAX__) +#define INT64_MIN (-__INT64_MAX__-1) +#define INT64_MAX (__INT64_MAX__) +#define INTPTR_MIN (-__INTPTR_MAX__-1) +#define INTPTR_MAX (__INTPTR_MAX__) + +#define UINT8_MAX (__UINT8_MAX__) +#define UINT16_MAX (__UINT16_MAX__) +#define UINT32_MAX (__UINT32_MAX__) +#define UINT64_MAX (__UINT64_MAX__) +#define UINTPTR_MAX (__UINTPTR_MAX__) + +#define PTRDIFF_MIN (-__PTRDIFF_MAX__-1) +#define PTRDIFF_MAX (__PTRDIFF_MAX__) + +#define SIZE_MAX (__SIZE_MAX__) +#define SSIZE_MIN (-__INT64_MAX__-1) +#define SSIZE_MAX (__INT64_MAX__) diff --git a/user/app1/appl.cc b/user/app1/appl.cc new file mode 100644 index 0000000..8918858 --- /dev/null +++ b/user/app1/appl.cc @@ -0,0 +1,5 @@ +#include "user/app1/appl.h" + +void Application::action() { //NOLINT + +} diff --git a/user/app1/appl.h b/user/app1/appl.h new file mode 100644 index 0000000..6256f97 --- /dev/null +++ b/user/app1/appl.h @@ -0,0 +1,20 @@ +#pragma once + +/*! \brief Test application + * + * + */ +class Application { + // Prevent copies and assignments + Application(const Application&) = delete; + Application& operator=(const Application&) = delete; + + public: + /*! \brief Constructor + */ + + /*! \brief Contains the application code. + * + */ + void action(); +}; diff --git a/user/app2/kappl.cc b/user/app2/kappl.cc new file mode 100644 index 0000000..520a549 --- /dev/null +++ b/user/app2/kappl.cc @@ -0,0 +1,4 @@ +#include "user/app2/kappl.h" + +void KeyboardApplication::action() { //NOLINT +} diff --git a/user/app2/kappl.h b/user/app2/kappl.h new file mode 100644 index 0000000..e44f776 --- /dev/null +++ b/user/app2/kappl.h @@ -0,0 +1,22 @@ +/*! \file + * \brief \ref KeyboardApplication to test the input + */ + +#pragma once + +/*! \brief Keyboard Application + */ +class KeyboardApplication { + // Prevent copies and assignments + KeyboardApplication(const KeyboardApplication&) = delete; + KeyboardApplication& operator=(const KeyboardApplication&) = delete; + + public: + /*! \brief Constructor + */ + + /*! \brief Contains the application code. + * + */ + void action(); +}; diff --git a/utils/math.h b/utils/math.h new file mode 100644 index 0000000..06f8eed --- /dev/null +++ b/utils/math.h @@ -0,0 +1,26 @@ +/*! \file + * \brief General purpose \ref Math "math functions" + */ + +#pragma once + +#include "types.h" + +/*! \brief Basic math helper functions + */ +namespace Math { + template + T abs(T a) { + return (a >= 0 ? a : -a); + } + + template + T min(T a, T b) { + return a > b ? b : a; + } + + template + T max(T a, T b) { + return a > b ? a : b; + } +} // namespace Math diff --git a/utils/size.h b/utils/size.h new file mode 100644 index 0000000..8a86794 --- /dev/null +++ b/utils/size.h @@ -0,0 +1,17 @@ +/*! \file + * \brief Template function to determine the length of an array + */ + +#pragma once + +#include "types.h" + +/* \brief Helper to retrieve the number of elements in an array + * (Warning: template magic) + * \param Array + * \return Number of elements + */ +template +constexpr size_t size(T (&/*unused*/)[N]) { + return N; +} diff --git a/utils/string.cc b/utils/string.cc new file mode 100644 index 0000000..319250c --- /dev/null +++ b/utils/string.cc @@ -0,0 +1,144 @@ +#include "string.h" +#include "types.h" + +extern "C" char *strchrnul(const char *s, int c) { + if (s != nullptr) { + while(*s != '\0') { + if (*s == c) { + break; + } + s++; + } + } + return const_cast(s); +} + +extern "C" char *strchr(const char *s, int c) { + if (s != nullptr) { + s = strchrnul(s, c); + if (*s == c) { + return const_cast(s); + } + } + return nullptr; +} + +extern "C" int strcmp(const char *s1, const char *s2) { + if (s1 == nullptr || s2 == nullptr) { + return 0; + } + + while(*s1 == *s2++) { + if (*s1++ == '\0') { + return 0; + } + } + return static_cast(*s1) - static_cast(*(s2-1)); +} + +extern "C" int strncmp(const char *s1, const char *s2, size_t n) { + if (s1 != nullptr && s2 != nullptr) { + for (size_t i = 0; i < n; i++) { + if (s1[i] != s2[i]) { + return static_cast(s1[i]) - static_cast(s2[i]); + } else if (s1[i] == '\0') { + break; + } + } + } + return 0; +} + +extern "C" size_t strlen(const char *s) { + size_t len = 0; + if (s != nullptr) { + while (*s++ != '\0') { + len++; + } + } + + return len; +} + +extern "C" size_t strnlen(const char *s, size_t maxlen) { + size_t len = 0; + if (s != nullptr) { + while (maxlen-- > 0 && *s++ != '\0') { + len++; + } + } + + return len; +} + +extern "C" char * strcpy(char *dest, const char *src) { //NOLINT + char *r = dest; + if (dest != nullptr && src != nullptr) { + while ((*dest++ = *src++) != '\0') {} + } + return r; +} + +extern "C" char * strncpy(char *dest, const char *src, size_t n) { + char *r = dest; + if (dest != nullptr && src != nullptr) { + while (*src != '\0' && n-- != 0) { + *dest++ = *src++; + } + while (n-- != 0) { + *dest++ = '\0'; + } + } + return r; +} + +extern "C" void* memcpy(void * __restrict__ dest, void const * __restrict__ src, size_t size) { + uint8_t *destination = reinterpret_cast(dest); + uint8_t const *source = (uint8_t const*)src; + + for(size_t i = 0; i != size; ++i) { + destination[i] = source[i]; + } + + return dest; +} + +extern "C" void* memmove(void * dest, void const * src, size_t size) { + uint8_t *destination = reinterpret_cast(dest); + uint8_t const *source = reinterpret_cast(src); + + if(source > destination) { + for(size_t i = 0; i != size; ++i) { + destination[i] = source[i]; + } + } else { + for(size_t i = size; i != 0; --i) { + destination[i-1] = source[i-1]; + } + } + + return dest; +} + +extern "C" void* memset(void *dest, int pattern, size_t size) { + uint8_t *destination = reinterpret_cast(dest); + + for(size_t i = 0; i != size; ++i) { + destination[i] = static_cast(pattern); + } + + return dest; +} + +extern "C" int memcmp(const void * s1, const void * s2, size_t n) { + const unsigned char * c1 = reinterpret_cast(s1); + const unsigned char * c2 = reinterpret_cast(s2); + + for(size_t i = 0; i != n; ++i) { + if (c1[i] != c2[i]) { + return static_cast(c1[i]) - static_cast(c2[i]); + } + } + + return 0; +} diff --git a/utils/string.h b/utils/string.h new file mode 100644 index 0000000..3ea4580 --- /dev/null +++ b/utils/string.h @@ -0,0 +1,122 @@ +/*! \file + * \brief General purpose \ref string "String functions" + */ +#pragma once + +#include "types.h" + +/*! \defgroup string String function + * \brief String functions as provided by `%string.h` in the C standard library + */ + +/*! \brief Find the first occurrence of a character in a string + * \ingroup string + * \param s string to + * \param c character to find + * \return Pointer to first occurrence of the character + * or to null byte at the end of the string if not found + */ +extern "C" char *strchrnul(const char *s, int c); + +/*! \brief Find the first occurrence of a character in a string + * \ingroup string + * \param s string to + * \param c character to find + * \return Pointer to first occurrence of the character + * or to nullptr if not found + */ +extern "C" char *strchr(const char *s, int c); + +/*! \brief Compare two strings + * \ingroup string + * \param s1 first string + * \param s2 second string + * \return an integer less than, equal to, or greater than zero if first string is found, respectively, + * to be less than, to match, or be greater than second string + */ +extern "C" int strcmp(const char *s1, const char *s2); + +/*! \brief Compare two strings + * \ingroup string + * \param s1 first string + * \param s2 second string + * \param n number of bytes to compare + * \return an integer less than, equal to, or greater than zero if the given number of bytes of the first string are + * found, respectively, to be less than, to match, or be greater than second string + */ +extern "C" int strncmp(const char *s1, const char *s2, size_t n); + +/*! \brief Calculate the length of a string + * \ingroup string + * \param s pointer to a string + * \return number of bytes in the string + */ +extern "C" size_t strlen(const char *s); + +/*! \brief Calculate the length of a string, limited by maxlen + * \ingroup string + * \param s pointer to a string + * \param maxlen upper limit of length to be returned + * \return number of bytes in the string, or maxlen -- whichever is smaller + */ +extern "C" size_t strnlen(const char *s, size_t maxlen); + +/*! \brief Copy the contents of a string + * including the terminating null byte (`\0`) + * \ingroup string + * \param dest destination string buffer + * \param src source string buffer + * \return a pointer to the destination string buffer + * \note Beware of buffer overruns! + */ +extern "C" char * strcpy(char * dest, const char * src); //NOLINT + +/*! \brief Copy the contents of a string up to a maximum length + * or the terminating null byte (`\0`), whatever comes first. + * \ingroup string + * \param dest destination string buffer + * \param src source string buffer + * \param n maximum number of bytes to copy + * \return a pointer to the destination string buffer + * \note If there is no null byte (`\0`) among the first `n` bytes, the destination will not be null-terminated! + */ +extern "C" char * strncpy(char * dest, const char * src, size_t n); + +/*! \brief Copy a memory area + * \ingroup string + * \param dest destination buffer + * \param src source buffer + * \param size number of bytes to copy + * \return pointer to destination + * \note The memory must not overlap! + */ +extern "C" void* memcpy(void * __restrict__ dest, void const * __restrict__ src, size_t size); + +/*! \brief Copy a memory area + * while the source may overlap with the destination + * \ingroup string + * \param dest destination buffer + * \param src source buffer + * \param size number of bytes to copy + * \return pointer to destination + */ +extern "C" void* memmove(void * dest, void const * src, size_t size); + +/*! \brief Fill a memory area with a pattern + * \ingroup string + * \param dest destination buffer + * \param pattern single byte pattern + * \param size number of bytes to fill with pattern + * \return pointer to destination + */ +extern "C" void* memset(void *dest, int pattern, size_t size); + +/*! \brief Compare a memory area + * \ingroup string + * \param s1 first memory buffer + * \param s2 second memory buffer + * \param n number of bytes to compare + * \return an integer less than, equal to, or greater than zero if the first n bytes of s1 is found, respectively, + * to be less than, to match, or be greater than the first n bytes of s2. + */ +extern "C" int memcmp(const void * s1, const void * s2, size_t n);