// Copyright (C) 2006-2024 Free Software Foundation, Inc.

// merge.h -- handle section merging for gold -*- C++ -*-

// Copyright (C) 2006-2024 Free Software Foundation, Inc.
// Written by Ian Lance Taylor <[email protected]>.

// This file is part of gold.

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
// MA 02110-1301, USA.

#ifndef GOLD_MERGE_H
#define GOLD_MERGE_H

#include <climits>
#include <map>
#include <vector>

#include "stringpool.h"
#include "output.h"

namespace gold
{

// For each object with merge sections, we store an Object_merge_map.
// This is used to map locations in input sections to a merged output
// section. The output section itself is not recorded here--it can be
// found in the output_sections_ field of the Object.

class Object_merge_map
{
public:
Object_merge_map()
: section_merge_maps_()
{ }

~Object_merge_map();

// Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
// + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
// output section. An OUTPUT_OFFSET of -1 means that the bytes are
// discarded. OUTPUT_OFFSET is relative to the start of the merged
// data in the output section.
void
add_mapping(const Output_section_data*, unsigned int shndx,
section_offset_type offset, section_size_type length,
section_offset_type output_offset);

// Get the output offset for an input address. MERGE_MAP is the map
// we are looking for, or NULL if we don't care. The input address
// is at offset OFFSET in section SHNDX. This sets *OUTPUT_OFFSET
// to the offset in the output section; this will be -1 if the bytes
// are not being copied to the output. This returns true if the
// mapping is known, false otherwise. *OUTPUT_OFFSET is relative to
// the start of the merged data in the output section.
bool
get_output_offset(unsigned int shndx,
section_offset_type offset,
section_offset_type* output_offset);

const Output_section_data*
find_merge_section(unsigned int shndx) const;

// Initialize an mapping from input offsets to output addresses for
// section SHNDX. STARTING_ADDRESS is the output address of the
// merged section.
template<int size>
void
initialize_input_to_output_map(
unsigned int shndx,
typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
Unordered_map<section_offset_type,
typename elfcpp::Elf_types<size>::Elf_Addr>*);

// Map input section offsets to a length and an output section
// offset. An output section offset of -1 means that this part of
// the input section is being discarded.
struct Input_merge_entry
{
// The offset in the input section.
section_offset_type input_offset;
// The length.
section_size_type length;
// The offset in the output section.
section_offset_type output_offset;
};

// A list of entries for a particular input section.
struct Input_merge_map
{
void add_mapping(section_offset_type input_offset, section_size_type length,
section_offset_type output_offset);

typedef std::vector<Input_merge_entry> Entries;

// We store these with the Relobj, and we look them up by input
// section. It is possible to have two different merge maps
// associated with a single output section. For example, this
// happens routinely with .rodata, when merged string constants
// and merged fixed size constants are both put into .rodata. The
// output offset that we store is not the offset from the start of
// the output section; it is the offset from the start of the
// merged data in the output section. That means that the caller
// is going to add the offset of the merged data within the output
// section, which means that the caller needs to know which set of
// merged data it found the entry in. So it's not enough to find
// this data based on the input section and the output section; we
// also have to find it based on a set of merged data in the
// output section. In order to verify that we are looking at the
// right data, we store a pointer to the Merge_map here, and we
// pass in a pointer when looking at the data. If we are asked to
// look up information for a different Merge_map, we report that
// we don't have it, rather than trying a lookup and returning an
// answer which will receive the wrong offset.
const Output_section_data* output_data;
// The list of mappings.
Entries entries;
// Whether the ENTRIES field is sorted by input_offset.
bool sorted;

Input_merge_map()
: output_data(NULL), entries(), sorted(true)
{ }
};

// Get or make the Input_merge_map to use for the section SHNDX
// with MERGE_MAP.
Input_merge_map*
get_or_make_input_merge_map(const Output_section_data* merge_map,
unsigned int shndx);

private:
// A less-than comparison routine for Input_merge_entry.
struct Input_merge_compare
{
bool
operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
{ return i1.input_offset < i2.input_offset; }
};

// Map input section indices to merge maps.
typedef std::vector<std::pair<unsigned int, Input_merge_map*> >
Section_merge_maps;

// Return a pointer to the Input_merge_map to use for the input
// section SHNDX, or NULL.
const Input_merge_map*
get_input_merge_map(unsigned int shndx) const;

Input_merge_map *
get_input_merge_map(unsigned int shndx) {
return const_cast<Input_merge_map *>(static_cast<const Object_merge_map *>(
this)->get_input_merge_map(shndx));
}

Section_merge_maps section_merge_maps_;
};

// A general class for SHF_MERGE data, to hold functions shared by
// fixed-size constant data and string data.

class Output_merge_base : public Output_section_data
{
public:
Output_merge_base(uint64_t entsize, uint64_t addralign)
: Output_section_data(addralign), entsize_(entsize),
keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1),
input_sections_()
{ }

// Return the entry size.
uint64_t
entsize() const
{ return this->entsize_; }

// Whether this is a merge string section. This is only true of
// Output_merge_string.
bool
is_string()
{ return this->do_is_string(); }

// Whether this keeps input sections.
bool
keeps_input_sections() const
{ return this->keeps_input_sections_; }

// Set the keeps-input-sections flag. This is virtual so that sub-classes
// can perform additional checks.
void
set_keeps_input_sections()
{ this->do_set_keeps_input_sections(); }

// Return the object of the first merged input section. This used
// for script processing. This is NULL if merge section is empty.
Relobj*
first_relobj() const
{ return this->first_relobj_; }

// Return the section index of the first merged input section. This
// is used for script processing. This is valid only if merge section
// is not valid.
unsigned int
first_shndx() const
{
gold_assert(this->first_relobj_ != NULL);
return this->first_shndx_;
}

// Set of merged input sections.
typedef Unordered_set<Section_id, Section_id_hash> Input_sections;

// Beginning of merged input sections.
Input_sections::const_iterator
input_sections_begin() const
{
gold_assert(this->keeps_input_sections_);
return this->input_sections_.begin();
}

// Beginning of merged input sections.
Input_sections::const_iterator
input_sections_end() const
{
gold_assert(this->keeps_input_sections_);
return this->input_sections_.end();
}

protected:
// Return the output offset for an input offset.
bool
do_output_offset(const Relobj* object, unsigned int shndx,
section_offset_type offset,
section_offset_type* poutput) const;

// This may be overridden by the child class.
virtual bool
do_is_string()
{ return false; }

// This may be overridden by the child class.
virtual void
do_set_keeps_input_sections()
{ this->keeps_input_sections_ = true; }

// Record the merged input section for script processing.
void
record_input_section(Relobj* relobj, unsigned int shndx);

private:
// The entry size. For fixed-size constants, this is the size of
// the constants. For strings, this is the size of a character.
uint64_t entsize_;
// Whether we keep input sections.
bool keeps_input_sections_;
// Object of the first merged input section. We use this for script
// processing.
Relobj* first_relobj_;
// Section index of the first merged input section.
unsigned int first_shndx_;
// Input sections. We only keep them is keeps_input_sections_ is true.
Input_sections input_sections_;
};

// Handle SHF_MERGE sections with fixed-size constant data.

class Output_merge_data : public Output_merge_base
{
public:
Output_merge_data(uint64_t entsize, uint64_t addralign)
: Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
input_count_(0),
hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
{ }

protected:
// Add an input section.
bool
do_add_input_section(Relobj* object, unsigned int shndx);

// Set the final data size.
void
set_final_data_size();

// Write the data to the file.
void
do_write(Output_file*);

// Write the data to a buffer.
void
do_write_to_buffer(unsigned char*);

// Write to a map file.
void
do_print_to_mapfile(Mapfile* mapfile) const
{ mapfile->print_output_data(this, _("** merge constants")); }

// Print merge stats to stderr.
void
do_print_merge_stats(const char* section_name);

// Set keeps-input-sections flag.
void
do_set_keeps_input_sections()
{
gold_assert(this->input_count_ == 0);
Output_merge_base::do_set_keeps_input_sections();
}

private:
// We build a hash table of the fixed-size constants. Each constant
// is stored as a pointer into the section data we are accumulating.

// A key in the hash table. This is an offset in the section
// contents we are building.
typedef section_offset_type Merge_data_key;

// Compute the hash code. To do this we need a pointer back to the
// object holding the data.
class Merge_data_hash
{
public:
Merge_data_hash(const Output_merge_data* pomd)
: pomd_(pomd)
{ }

size_t
operator()(Merge_data_key) const;

private:
const Output_merge_data* pomd_;
};

friend class Merge_data_hash;

// Compare two entries in the hash table for equality. To do this
// we need a pointer back to the object holding the data. Note that
// we now have a pointer to the object stored in two places in the
// hash table. Fixing this would require specializing the hash
// table, which would be hard to do portably.
class Merge_data_eq
{
public:
Merge_data_eq(const Output_merge_data* pomd)
: pomd_(pomd)
{ }

bool
operator()(Merge_data_key k1, Merge_data_key k2) const;

private:
const Output_merge_data* pomd_;
};

friend class Merge_data_eq;

// The type of the hash table.
typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
Merge_data_hashtable;

// Given a hash table key, which is just an offset into the section
// data, return a pointer to the corresponding constant.
const unsigned char*
constant(Merge_data_key k) const
{
gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
return this->p_ + k;
}

// Add a constant to the output.
void
add_constant(const unsigned char*);

// The accumulated data.
unsigned char* p_;
// The length of the accumulated data.
section_size_type len_;
// The size of the allocated buffer.
section_size_type alc_;
// The number of entries seen in input files.
size_t input_count_;
// The hash table.
Merge_data_hashtable hashtable_;
};

// Handle SHF_MERGE sections with string data. This is a template
// based on the type of the characters in the string.

template<typename Char_type>
class Output_merge_string : public Output_merge_base
{
public:
Output_merge_string(uint64_t addralign)
: Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign),
merged_strings_lists_(), input_count_(0), input_size_(0)
{
this->stringpool_.set_no_zero_null();
}

protected:
// Add an input section.
bool
do_add_input_section(Relobj* object, unsigned int shndx);

// Do all the final processing after the input sections are read in.
// Returns the final data size.
section_size_type
finalize_merged_data();

// Set the final data size.
void
set_final_data_size();

// Write the data to the file.
void
do_write(Output_file*);

// Write the data to a buffer.
void
do_write_to_buffer(unsigned char*);

// Write to a map file.
void
do_print_to_mapfile(Mapfile* mapfile) const
{ mapfile->print_output_data(this, _("** merge strings")); }

// Print merge stats to stderr.
void
do_print_merge_stats(const char* section_name);

// Writes the stringpool to a buffer.
void
stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
{ this->stringpool_.write_to_buffer(buffer, buffer_size); }

// Clears all the data in the stringpool, to save on memory.
void
clear_stringpool()
{ this->stringpool_.clear(); }

// Whether this is a merge string section.
virtual bool
do_is_string()
{ return true; }

// Set keeps-input-sections flag.
void
do_set_keeps_input_sections()
{
gold_assert(this->input_count_ == 0);
Output_merge_base::do_set_keeps_input_sections();
}

private:
// The name of the string type, for stats.
const char*
string_name();

// As we see input sections, we build a mapping from object, section
// index and offset to strings.
struct Merged_string
{
// The offset in the input section.
section_offset_type offset;
// The key in the Stringpool.
Stringpool::Key stringpool_key;

Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya)
: offset(offseta), stringpool_key(stringpool_keya)
{ }
};

typedef std::vector<Merged_string> Merged_strings;

struct Merged_strings_list
{
// The input object where the strings were found.
Relobj* object;
// The input section in the input object.
unsigned int shndx;
// The list of merged strings.
Merged_strings merged_strings;

Merged_strings_list(Relobj* objecta, unsigned int shndxa)
: object(objecta), shndx(shndxa), merged_strings()
{ }
};

typedef std::vector<Merged_strings_list*> Merged_strings_lists;

// As we see the strings, we add them to a Stringpool.
Stringpool_template<Char_type> stringpool_;
// Map from a location in an input object to an entry in the
// Stringpool.
Merged_strings_lists merged_strings_lists_;
// The number of entries seen in input files.
size_t input_count_;
// The total size of input sections.
size_t input_size_;
};

} // End namespace gold.

#endif // !defined(GOLD_MERGE_H)