diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h index e41b3d51173d4..95b81f5f64c8f 100644 --- a/llvm/include/llvm/Object/ArchiveWriter.h +++ b/llvm/include/llvm/Object/ArchiveWriter.h @@ -48,25 +48,30 @@ enum class SymtabWritingMode { BigArchive64 // Only write the 64-bit symbol table. }; +void warnToStderr(Error Err); + // Write an archive directly to an output stream. Error writeArchiveToStream(raw_ostream &Out, ArrayRef NewMembers, SymtabWritingMode WriteSymtab, object::Archive::Kind Kind, bool Deterministic, - bool Thin, std::optional IsEC = std::nullopt); + bool Thin, std::optional IsEC = std::nullopt, + function_ref Warn = warnToStderr); Error writeArchive(StringRef ArcName, ArrayRef NewMembers, SymtabWritingMode WriteSymtab, object::Archive::Kind Kind, bool Deterministic, bool Thin, std::unique_ptr OldArchiveBuf = nullptr, - std::optional IsEC = std::nullopt); + std::optional IsEC = std::nullopt, + function_ref Warn = warnToStderr); // writeArchiveToBuffer is similar to writeArchive but returns the Archive in a // buffer instead of writing it out to a file. Expected> writeArchiveToBuffer(ArrayRef NewMembers, SymtabWritingMode WriteSymtab, object::Archive::Kind Kind, - bool Deterministic, bool Thin); + bool Deterministic, bool Thin, + function_ref Warn = warnToStderr); } #endif diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index 34f12cf0111cf..114045561366d 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -482,7 +482,8 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind, } static Expected> -getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) { +getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context, + object::Archive::Kind Kind, function_ref Warn) { const file_magic Type = identify_magic(Buf.getBuffer()); // Don't attempt to read non-symbolic file types. if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) @@ -490,8 +491,36 @@ getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) { if (Type == file_magic::bitcode) { auto ObjOrErr = object::SymbolicFile::createSymbolicFile( Buf, file_magic::bitcode, &Context); - if (!ObjOrErr) - return ObjOrErr.takeError(); + // An error reading a bitcode file most likely indicates that the file + // was created by a compiler from the future. Normally we don't try to + // implement forwards compatibility for bitcode files, but when creating an + // archive we can implement best-effort forwards compatibility by treating + // the file as a blob and not creating symbol index entries for it. lld and + // mold ignore the archive symbol index, so provided that you use one of + // these linkers, LTO will work as long as lld or the gold plugin is newer + // than the compiler. We only ignore errors if the archive format is one + // that is supported by a linker that is known to ignore the index, + // otherwise there's no chance of this working so we may as well error out. + // We print a warning on read failure so that users of linkers that rely on + // the symbol index can diagnose the issue. + // + // This is the same behavior as GNU ar when the linker plugin returns an + // error when reading the input file. If the bitcode file is actually + // malformed, it will be diagnosed at link time. + if (!ObjOrErr) { + switch (Kind) { + case object::Archive::K_BSD: + case object::Archive::K_GNU: + case object::Archive::K_GNU64: + Warn(ObjOrErr.takeError()); + return nullptr; + case object::Archive::K_AIXBIG: + case object::Archive::K_COFF: + case object::Archive::K_DARWIN: + case object::Archive::K_DARWIN64: + return ObjOrErr.takeError(); + } + } return std::move(*ObjOrErr); } else { auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf); @@ -751,7 +780,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, object::Archive::Kind Kind, bool Thin, bool Deterministic, SymtabWritingMode NeedSymbols, SymMap *SymMap, LLVMContext &Context, ArrayRef NewMembers, - std::optional IsEC) { + std::optional IsEC, function_ref Warn) { static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; uint64_t MemHeadPadSize = 0; uint64_t Pos = @@ -819,8 +848,10 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) { for (const NewArchiveMember &M : NewMembers) { - Expected> SymFileOrErr = - getSymbolicFile(M.Buf->getMemBufferRef(), Context); + Expected> SymFileOrErr = getSymbolicFile( + M.Buf->getMemBufferRef(), Context, Kind, [&](Error Err) { + Warn(createFileError(M.MemberName, std::move(Err))); + }); if (!SymFileOrErr) return createFileError(M.MemberName, SymFileOrErr.takeError()); SymFiles.push_back(std::move(*SymFileOrErr)); @@ -1001,7 +1032,8 @@ Error writeArchiveToStream(raw_ostream &Out, ArrayRef NewMembers, SymtabWritingMode WriteSymtab, object::Archive::Kind Kind, bool Deterministic, - bool Thin, std::optional IsEC) { + bool Thin, std::optional IsEC, + function_ref Warn) { assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); SmallString<0> SymNamesBuf; @@ -1023,7 +1055,7 @@ Error writeArchiveToStream(raw_ostream &Out, Expected> DataOrErr = computeMemberData( StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab, - isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers, IsEC); + isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers, IsEC, Warn); if (Error E = DataOrErr.takeError()) return E; std::vector &Data = *DataOrErr; @@ -1266,11 +1298,15 @@ Error writeArchiveToStream(raw_ostream &Out, return Error::success(); } +void warnToStderr(Error Err) { + llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "warning: "); +} + Error writeArchive(StringRef ArcName, ArrayRef NewMembers, SymtabWritingMode WriteSymtab, object::Archive::Kind Kind, bool Deterministic, bool Thin, std::unique_ptr OldArchiveBuf, - std::optional IsEC) { + std::optional IsEC, function_ref Warn) { Expected Temp = sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a"); if (!Temp) @@ -1278,7 +1314,7 @@ Error writeArchive(StringRef ArcName, ArrayRef NewMembers, raw_fd_ostream Out(Temp->FD, false); if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind, - Deterministic, Thin, IsEC)) { + Deterministic, Thin, IsEC, Warn)) { if (Error DiscardError = Temp->discard()) return joinErrors(std::move(E), std::move(DiscardError)); return E; @@ -1302,12 +1338,14 @@ Error writeArchive(StringRef ArcName, ArrayRef NewMembers, Expected> writeArchiveToBuffer(ArrayRef NewMembers, SymtabWritingMode WriteSymtab, object::Archive::Kind Kind, - bool Deterministic, bool Thin) { + bool Deterministic, bool Thin, + function_ref Warn) { SmallVector ArchiveBufferVector; raw_svector_ostream ArchiveStream(ArchiveBufferVector); - if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, - Kind, Deterministic, Thin, std::nullopt)) + if (Error E = + writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, Kind, + Deterministic, Thin, std::nullopt, Warn)) return std::move(E); return std::make_unique( diff --git a/llvm/test/Object/archive-malformed-object.test b/llvm/test/Object/archive-malformed-object.test index a92762975bda6..021df7ed873d1 100644 --- a/llvm/test/Object/archive-malformed-object.test +++ b/llvm/test/Object/archive-malformed-object.test @@ -1,27 +1,51 @@ ## Show that the archive library emits error messages when adding malformed -## objects. +## object files and skips symbol tables for "malformed" bitcode files, which +## are assumed to be bitcode files generated by compilers from the future. # RUN: rm -rf %t.dir # RUN: split-file %s %t.dir # RUN: cd %t.dir -## Malformed bitcode object is the first file member of archive if the symbol table is required. +## Create a malformed bitcode object. # RUN: llvm-as input.ll -o input.bc # RUN: cp input.bc good.bc # RUN: %python -c "with open('input.bc', 'a') as f: f.truncate(10)" -# RUN: not llvm-ar rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1 -## Malformed bitcode object is the last file member of archive if the symbol table is required. +## Malformed bitcode objects either warn or error depending on the archive format +## (see switch in getSymbolicFile). If the archive was created with a warning, +## we want to check that the archive map is empty. llvm-nm will fail when it +## tries to read the malformed bitcode file, but it's supposed to print the +## archive map first, which in this case it won't because there won't be one. # RUN: rm -rf bad.a -# RUN: not llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1 +# RUN: llvm-ar --format=bsd rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1 +# RUN: not llvm-nm --print-armap bad.a | count 0 +# RUN: rm -rf bad.a +# RUN: llvm-ar --format=gnu rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1 +# RUN: not llvm-nm --print-armap bad.a | count 0 +# RUN: rm -rf bad.a +# RUN: not llvm-ar --format=bigarchive rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1 +# RUN: rm -rf bad.a +# RUN: not llvm-ar --format=coff rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1 +# RUN: rm -rf bad.a +# RUN: not llvm-ar --format=darwin rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1 + +## Malformed bitcode object is the last file member of archive and +## the symbol table is required. In this case we check that the +## symbol table contains entries for the good object only. +# RUN: rm -rf bad.a +# RUN: llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=WARN1 +# RUN: not llvm-nm --print-armap bad.a | FileCheck %s --check-prefix=ARMAP ## Malformed bitcode object if the symbol table is not required for big archive. +## For big archives we print an error instead of a warning because the AIX linker +## presumably requires the index. # RUN: rm -rf bad.a # RUN: not llvm-ar --format=bigarchive rcS bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1 # RUN: rm -rf bad.a # RUN: not llvm-ar --format=bigarchive rcS bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1 # ERR1: error: bad.a: 'input.bc': Invalid bitcode signature +# WARN1: warning: 'input.bc': Invalid bitcode signature ## Non-bitcode malformed file. # RUN: yaml2obj input.yaml -o input.o @@ -29,17 +53,23 @@ # ERR2: error: bad.a: 'input.o': section header table goes past the end of the file: e_shoff = 0x9999 -## Don't emit an error if the symbol table is not required for formats other than the big archive format. -# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc +## Don't emit an error or warning if the symbol table is not required for formats other than the big archive format. +# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc 2>&1 | count 0 # RUN: llvm-ar t good.a | FileCheck %s --check-prefix=CONTENTS # CONTENTS: input.o # CONTENTS-NEXT: input.bc +# ARMAP: Archive map +# ARMAP-NEXT: foo in good.bc +# ARMAP-EMPTY: + #--- input.ll target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux" +@foo = global i32 1 + #--- input.yaml --- !ELF FileHeader: