tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
if (consumer->opcode() == HloOpcode::kDot) { // In the general case we call out to optimized "black box" GEMM routines // for Dot, which precludes fusion. However, in very specific cases, we try // to fuse Dot operations by generating an elemental dot implementation. // // We need to be careful and conservative here since any benefit we get from // fusion can easily be overshadowed by the overhead of a naive GEMM // algorithm in the IR.