diff --git a/do_loop_with_local_and_local_init.f90 b/do_loop_with_local_and_local_init.f90 new file mode 100644 index 0000000000000..55642f7cb4024 --- /dev/null +++ b/do_loop_with_local_and_local_init.f90 @@ -0,0 +1,15 @@ +! For testing try: `flang -fc1 -emit-hlfir -mmlir --openmp-enable-delayed-privatization-staging=true do_loop_with_local_and_local_init.f90 -o test.mlir + +! TODO Will be added as proper test later. +subroutine omploop + implicit none + integer :: i, local_var, local_init_var + + do concurrent (i=1:10) local(local_var) local_init(local_init_var) + if (i < 5) then + local_var = 42 + else + local_init_var = 84 + end if + end do +end subroutine diff --git a/do_loop_with_local_and_local_init.mlir b/do_loop_with_local_and_local_init.mlir new file mode 100644 index 0000000000000..06510b4433f1a --- /dev/null +++ b/do_loop_with_local_and_local_init.mlir @@ -0,0 +1,49 @@ +// For testing: +// 1. parsing/printing (roundtripping): `fir-opt do_loop_with_local_and_local_init.mlir -o roundtrip.mlir` +// 2. Lowering locality specs during CFG: `fir-opt --cfg-conversion do_loop_with_local_and_local_init.mlir -o after_cfg_lowering.mlir` + +// TODO I will add both of the above steps as proper tests when the PoC is complete. +module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 21.0.0 (/home/kaergawy/git/aomp20.0/llvm-project/flang c8cf5a644886bb8dd3ad19be6e3b916ffcbd222c)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { + + omp.private {type = private} @local_privatizer : i32 + + omp.private {type = firstprivate} @local_init_privatizer : i32 copy { + ^bb0(%arg0: !fir.ref, %arg1: !fir.ref): + %0 = fir.load %arg0 : !fir.ref + fir.store %0 to %arg1 : !fir.ref + omp.yield(%arg1 : !fir.ref) + } + + func.func @_QPomploop() { + %0 = fir.alloca i32 {bindc_name = "i"} + %1:2 = hlfir.declare %0 {uniq_name = "_QFomploopEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomploopEi"} + %3:2 = hlfir.declare %2 {uniq_name = "_QFomploopEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %4 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFomploopElocal_init_var"} + %5:2 = hlfir.declare %4 {uniq_name = "_QFomploopElocal_init_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %6 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFomploopElocal_var"} + %7:2 = hlfir.declare %6 {uniq_name = "_QFomploopElocal_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32 = arith.constant 1 : i32 + %8 = fir.convert %c1_i32 : (i32) -> index + %c10_i32 = arith.constant 10 : i32 + %9 = fir.convert %c10_i32 : (i32) -> index + %c1 = arith.constant 1 : index + fir.do_loop %arg0 = %8 to %9 step %c1 unordered private(@local_privatizer %7#0 -> %arg1, @local_init_privatizer %5#0 -> %arg2 : !fir.ref, !fir.ref) { + %10 = fir.convert %arg0 : (index) -> i32 + fir.store %10 to %1#1 : !fir.ref + %12:2 = hlfir.declare %arg1 {uniq_name = "_QFomploopElocal_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %14:2 = hlfir.declare %arg2 {uniq_name = "_QFomploopElocal_init_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %16 = fir.load %1#0 : !fir.ref + %c5_i32 = arith.constant 5 : i32 + %17 = arith.cmpi slt, %16, %c5_i32 : i32 + fir.if %17 { + %c42_i32 = arith.constant 42 : i32 + hlfir.assign %c42_i32 to %12#0 : i32, !fir.ref + } else { + %c84_i32 = arith.constant 84 : i32 + hlfir.assign %c84_i32 to %14#0 : i32, !fir.ref + } + } + return + } +} diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 1d1323642bf9c..81c220e29e164 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -348,6 +348,9 @@ class AbstractConverter { virtual Fortran::lower::SymbolBox lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0; + virtual Fortran::lower::SymbolBox + shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0; + /// Return the mlir::SymbolTable associated to the ModuleOp. /// Look-ups are faster using it than using module.lookup<>, /// but the module op should be queried in case of failure diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt index 73f388cbab6c9..da14fcd25a8d3 100644 --- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt +++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt @@ -16,8 +16,8 @@ mlir_tablegen(FIRAttr.cpp.inc -gen-attrdef-defs) set(LLVM_TARGET_DEFINITIONS FIROps.td) mlir_tablegen(FIROps.h.inc -gen-op-decls) mlir_tablegen(FIROps.cpp.inc -gen-op-defs) -mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls) -mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs) +mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls -typedefs-dialect=fir) +mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs -typedefs-dialect=fir) add_public_tablegen_target(FIROpsIncGen) set(LLVM_TARGET_DEFINITIONS FortranVariableInterface.td) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 7147a2401baa7..073fe4994b567 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -16,6 +16,7 @@ include "mlir/Dialect/Arith/IR/ArithBase.td" include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" +include "mlir/Dialect/OpenMP/OpenMPClauses.td" include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td" include "flang/Optimizer/Dialect/FIRDialect.td" @@ -2226,7 +2227,7 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments, let hasVerifier = 1; let hasCustomAssemblyFormat = 1; - let arguments = (ins + defvar opArgs = (ins Index:$lowerBound, Index:$upperBound, Index:$step, @@ -2237,6 +2238,8 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments, OptionalAttr:$reduceAttrs, OptionalAttr:$loopAnnotation ); + + let arguments = !con(opArgs, OpenMP_PrivateClause.arguments); let results = (outs Variadic:$results); let regions = (region SizedRegion<1>:$region); @@ -2248,24 +2251,38 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments, CArg<"mlir::ValueRange", "std::nullopt">:$iterArgs, CArg<"mlir::ValueRange", "std::nullopt">:$reduceOperands, CArg<"llvm::ArrayRef", "{}">:$reduceAttrs, - CArg<"llvm::ArrayRef", "{}">:$attributes)> + CArg<"llvm::ArrayRef", "{}">:$attributes, + CArg<"mlir::ValueRange", "std::nullopt">:$private_vars, + CArg<"mlir::ArrayRef", "{}">:$private_syms + )> ]; - let extraClassDeclaration = [{ - mlir::Value getInductionVar() { return getBody()->getArgument(0); } + defvar opExtraClassDeclaration = [{ mlir::OpBuilder getBodyBuilder() { return mlir::OpBuilder(getBody(), std::prev(getBody()->end())); } + + /// Region argument accessors. + mlir::Value getInductionVar() { return getBody()->getArgument(0); } mlir::Block::BlockArgListType getRegionIterArgs() { - return getBody()->getArguments().drop_front(); + // 1 for skipping the induction variable. + return getBody()->getArguments().slice(1, getNumIterOperands()); + } + mlir::Block::BlockArgListType getRegionPrivateArgs() { + return getBody()->getArguments().slice(1 + getNumIterOperands(), + numPrivateBlockArgs()); } + + /// Operation operand accessors. mlir::Operation::operand_range getIterOperands() { return getOperands() - .drop_front(getNumControlOperands() + getNumReduceOperands()); + .slice(getNumControlOperands() + getNumReduceOperands(), + getNumIterOperands()); } llvm::MutableArrayRef getInitsMutable() { return getOperation()->getOpOperands() - .drop_front(getNumControlOperands() + getNumReduceOperands()); + .slice(getNumControlOperands() + getNumReduceOperands(), + getNumIterOperands()); } void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); } @@ -2274,7 +2291,7 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments, /// Number of region arguments for loop-carried values unsigned getNumRegionIterArgs() { - return getBody()->getNumArguments() - 1; + return getNumIterOperands(); } /// Number of operands controlling the loop: lb, ub, step unsigned getNumControlOperands() { return 3; } @@ -2313,6 +2330,10 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments, unsigned resultNum); mlir::Value blockArgToSourceOp(unsigned blockArgNum); }]; + + let extraClassDeclaration = + !strconcat(opExtraClassDeclaration, "\n", + OpenMP_PrivateClause.extraClassDeclaration); } def fir_IfOp : region_Op<"if", [DeclareOpInterfaceMethodsdetailsIf(); @@ -2036,11 +2055,27 @@ class FirConverter : public Fortran::lower::AbstractConverter { assign.u = Fortran::evaluate::Assignment::BoundsSpec{}; genAssignment(assign); } + for (const Fortran::semantics::Symbol *sym : info.sharedSymList) { const auto *hostDetails = sym->detailsIf(); copySymbolBinding(hostDetails->symbol(), *sym); } + + info.doLoop.getPrivateVarsMutable().assign(privateClauseOps.privateVars); + info.doLoop.setPrivateSymsAttr( + builder->getArrayAttr(privateClauseOps.privateSyms)); + + for (auto [sym, privateVar] : llvm::zip_equal( + dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) { + auto arg = info.doLoop.getRegion().begin()->addArgument( + privateVar.getType(), info.doLoop.getLoc()); + bindSymbol(*sym, hlfir::translateToExtendedValue( + privateVar.getLoc(), *builder, hlfir::Entity{arg}, + /*contiguousHint=*/true) + .first); + } + // Note that allocatable, types with ultimate components, and type // requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130), // so no clean-up needs to be generated for these entities. diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index b88454c45da85..fde3e49445bdd 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -53,6 +53,15 @@ DataSharingProcessor::DataSharingProcessor( }); } +DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + bool useDelayedPrivatization, + lower::SymMap &symTable) + : DataSharingProcessor(converter, semaCtx, {}, eval, + /*shouldCollectPreDeterminedSymols=*/false, + useDelayedPrivatization, symTable) {} + void DataSharingProcessor::processStep1( mlir::omp::PrivateClauseOps *clauseOps) { collectSymbolsForPrivatization(); @@ -504,22 +513,28 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) { } } -void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, +void DataSharingProcessor::doPrivatize(const semantics::Symbol *symToPrivatize, mlir::omp::PrivateClauseOps *clauseOps) { if (!useDelayedPrivatization) { - cloneSymbol(sym); - copyFirstPrivateSymbol(sym); + cloneSymbol(symToPrivatize); + copyFirstPrivateSymbol(symToPrivatize); return; } - lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); + const semantics::Symbol *sym = symToPrivatize->HasLocalLocality() + ? &symToPrivatize->GetUltimate() + : symToPrivatize; + lower::SymbolBox hsb = symToPrivatize->HasLocalLocality() + ? converter.shallowLookupSymbol(*sym) + : converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); hlfir::Entity entity{hsb.getAddr()}; bool cannotHaveNonDefaultLowerBounds = !entity.mayHaveNonDefaultLowerBounds(); mlir::Location symLoc = hsb.getAddr().getLoc(); std::string privatizerName = sym->name().ToString() + ".privatizer"; - bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate); + bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate) || + sym->test(semantics::Symbol::Flag::LocalityLocalInit); mlir::Value privVal = hsb.getAddr(); mlir::Type allocType = privVal.getType(); @@ -645,6 +660,8 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, } symToPrivatizer[sym] = privatizerOp; + if (symToPrivatize->HasLocalLocality()) + allPrivatizedSymbols.insert(symToPrivatize); } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index 54a42fd199831..f5fef9f6dfe85 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -105,8 +105,6 @@ class DataSharingProcessor { void collectImplicitSymbols(); void collectPreDeterminedSymbols(); void privatize(mlir::omp::PrivateClauseOps *clauseOps); - void doPrivatize(const semantics::Symbol *sym, - mlir::omp::PrivateClauseOps *clauseOps); void copyLastPrivatize(mlir::Operation *op); void insertLastPrivateCompare(mlir::Operation *op); void cloneSymbol(const semantics::Symbol *sym); @@ -125,6 +123,11 @@ class DataSharingProcessor { bool shouldCollectPreDeterminedSymbols, bool useDelayedPrivatization, lower::SymMap &symTable); + DataSharingProcessor(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + bool useDelayedPrivatization, lower::SymMap &symTable); + // Privatisation is split into two steps. // Step1 performs cloning of all privatisation clauses and copying for // firstprivates. Step1 is performed at the place where process/processStep1 @@ -151,6 +154,9 @@ class DataSharingProcessor { ? allPrivatizedSymbols.getArrayRef() : llvm::ArrayRef(); } + + void doPrivatize(const semantics::Symbol *sym, + mlir::omp::PrivateClauseOps *clauseOps); }; } // namespace omp diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 033d6453a619a..ab5bef9e93a5f 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -2603,14 +2603,16 @@ void fir::DoLoopOp::build(mlir::OpBuilder &builder, bool finalCountValue, mlir::ValueRange iterArgs, mlir::ValueRange reduceOperands, llvm::ArrayRef reduceAttrs, - llvm::ArrayRef attributes) { + llvm::ArrayRef attributes, + mlir::ValueRange privateVars, + mlir::ArrayRef privateSyms) { result.addOperands({lb, ub, step}); result.addOperands(reduceOperands); result.addOperands(iterArgs); result.addAttribute(getOperandSegmentSizeAttr(), builder.getDenseI32ArrayAttr( {1, 1, 1, static_cast(reduceOperands.size()), - static_cast(iterArgs.size())})); + static_cast(iterArgs.size()), 0})); if (finalCountValue) { result.addTypes(builder.getIndexType()); result.addAttribute(getFinalValueAttrName(result.name), @@ -2686,8 +2688,9 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser, // Parse the optional initial iteration arguments. llvm::SmallVector regionArgs; - llvm::SmallVector iterOperands; llvm::SmallVector argTypes; + + llvm::SmallVector iterOperands; bool prependCount = false; regionArgs.push_back(inductionVariable); @@ -2712,15 +2715,6 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser, prependCount = true; } - // Set the operandSegmentSizes attribute - result.addAttribute(getOperandSegmentSizeAttr(), - builder.getDenseI32ArrayAttr( - {1, 1, 1, static_cast(reduceOperands.size()), - static_cast(iterOperands.size())})); - - if (parser.parseOptionalAttrDictWithKeyword(result.attributes)) - return mlir::failure(); - // Induction variable. if (prependCount) result.addAttribute(DoLoopOp::getFinalValueAttrName(result.name), @@ -2729,15 +2723,77 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser, argTypes.push_back(indexType); // Loop carried variables argTypes.append(result.types.begin(), result.types.end()); - // Parse the body region. - auto *body = result.addRegion(); + if (regionArgs.size() != argTypes.size()) return parser.emitError( parser.getNameLoc(), "mismatch in number of loop-carried values and defined values"); + + llvm::SmallVector privateOperands; + if (succeeded(parser.parseOptionalKeyword("private"))) { + std::size_t oldArgTypesSize = argTypes.size(); + if (failed(parser.parseLParen())) + return mlir::failure(); + + llvm::SmallVector privateSymbolVec; + if (failed(parser.parseCommaSeparatedList([&]() { + if (failed(parser.parseAttribute(privateSymbolVec.emplace_back()))) + return mlir::failure(); + + if (parser.parseOperand(privateOperands.emplace_back()) || + parser.parseArrow() || + parser.parseArgument(regionArgs.emplace_back())) + return mlir::failure(); + + return mlir::success(); + }))) + return mlir::failure(); + + if (failed(parser.parseColon())) + return mlir::failure(); + + if (failed(parser.parseCommaSeparatedList([&]() { + if (failed(parser.parseType(argTypes.emplace_back()))) + return mlir::failure(); + + return mlir::success(); + }))) + return mlir::failure(); + + if (regionArgs.size() != argTypes.size()) + return parser.emitError(parser.getNameLoc(), + "mismatch in number of private arg and types"); + + if (failed(parser.parseRParen())) + return mlir::failure(); + + for (auto operandType : llvm::zip_equal( + privateOperands, llvm::drop_begin(argTypes, oldArgTypesSize))) + if (parser.resolveOperand(std::get<0>(operandType), + std::get<1>(operandType), result.operands)) + return mlir::failure(); + + llvm::SmallVector symbolAttrs(privateSymbolVec.begin(), + privateSymbolVec.end()); + result.addAttribute(getPrivateSymsAttrName(result.name), + builder.getArrayAttr(symbolAttrs)); + } + + if (parser.parseOptionalAttrDictWithKeyword(result.attributes)) + return mlir::failure(); + + // Set the operandSegmentSizes attribute + result.addAttribute(getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr( + {1, 1, 1, static_cast(reduceOperands.size()), + static_cast(iterOperands.size()), + static_cast(privateOperands.size())})); + for (size_t i = 0, e = regionArgs.size(); i != e; ++i) regionArgs[i].type = argTypes[i]; + // Parse the body region. + auto *body = result.addRegion(); if (parser.parseRegion(*body, regionArgs)) return mlir::failure(); @@ -2831,9 +2887,25 @@ void fir::DoLoopOp::print(mlir::OpAsmPrinter &p) { p << " -> " << getResultTypes(); printBlockTerminators = true; } - p.printOptionalAttrDictWithKeyword( - (*this)->getAttrs(), - {"unordered", "finalValue", "reduceAttrs", "operandSegmentSizes"}); + + if (numPrivateBlockArgs() > 0) { + p << " private("; + llvm::interleaveComma(llvm::zip_equal(getPrivateSymsAttr(), + getPrivateVars(), + getRegionPrivateArgs()), + p, [&](auto it) { + p << std::get<0>(it) << " " << std::get<1>(it) + << " -> " << std::get<2>(it); + }); + p << " : "; + llvm::interleaveComma(getPrivateVars(), p, + [&](auto it) { p << it.getType(); }); + p << ")"; + } + + p.printOptionalAttrDictWithKeyword((*this)->getAttrs(), + {"unordered", "finalValue", "reduceAttrs", + "operandSegmentSizes", "private_syms"}); p << ' '; p.printRegion(getRegion(), /*printEntryBlockArgs=*/false, printBlockTerminators); diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp index b09bbf6106dbb..a203a162a023e 100644 --- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp +++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp @@ -32,6 +32,19 @@ using namespace fir; using namespace mlir; namespace { +/// Looks up from the operation from and returns the PrivateClauseOp with +/// name symbolName +/// +/// TODO Copied from OpenMPToLLVMIRTranslation.cpp, move to a shared location. +/// Maybe a static function on the `PrivateClauseOp`. +static omp::PrivateClauseOp findPrivatizer(Operation *from, + SymbolRefAttr symbolName) { + omp::PrivateClauseOp privatizer = + SymbolTable::lookupNearestSymbolFrom(from, + symbolName); + assert(privatizer && "privatizer not found in the symbol table"); + return privatizer; +} // Conversion of fir control ops to more primitive control-flow. // @@ -57,6 +70,50 @@ class CfgLoopConv : public mlir::OpRewritePattern { auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get( rewriter.getContext(), flags); + // Handle privatization + if (!loop.getPrivateVars().empty()) { + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(&loop.getRegion().front()); + std::optional privateSyms = loop.getPrivateSyms(); + + for (auto [privateVar, privateArg, privatizerSym] : + llvm::zip_equal(loop.getPrivateVars(), loop.getRegionPrivateArgs(), + *privateSyms)) { + SymbolRefAttr privatizerName = llvm::cast(privatizerSym); + omp::PrivateClauseOp privatizer = findPrivatizer(loop, privatizerName); + + mlir::Value localAlloc = + rewriter.create(loop.getLoc(), privatizer.getType()); + + if (privatizer.getDataSharingType() == + omp::DataSharingClauseType::FirstPrivate) { + mlir::Block *beforeLocalInit = rewriter.getInsertionBlock(); + mlir::Block *afterLocalInit = rewriter.splitBlock( + rewriter.getInsertionBlock(), rewriter.getInsertionPoint()); + rewriter.cloneRegionBefore(privatizer.getCopyRegion(), + afterLocalInit); + mlir::Block *copyRegionFront = beforeLocalInit->getNextNode(); + mlir::Block *copyRegionBack = afterLocalInit->getPrevNode(); + + rewriter.setInsertionPoint(beforeLocalInit, beforeLocalInit->end()); + rewriter.create( + loc, copyRegionFront, + llvm::SmallVector{privateVar, privateArg}); + + rewriter.eraseOp(copyRegionBack->getTerminator()); + rewriter.setInsertionPoint(copyRegionBack, copyRegionBack->end()); + rewriter.create(loc, afterLocalInit); + } + + rewriter.replaceAllUsesWith(privateArg, localAlloc); + } + + loop.getRegion().front().eraseArguments(1 + loop.getNumRegionIterArgs(), + loop.numPrivateBlockArgs()); + loop.getPrivateVarsMutable().clear(); + loop.setPrivateSymsAttr(nullptr); + } + // Create the start and end blocks that will wrap the DoLoopOp with an // initalizer and an end point auto *initBlock = rewriter.getInsertionBlock();