clip

min と max が無事に fuse されてる
ParallelForkJoin は使われていない
 define void @cluster_0__XlaCompiledKernel_true__XlaNumConstantArgs_0__XlaNumResourceArgs_0_.v8(i8* nocapture align 8 dereferenceable(8) %retval, i8* noalias nocapture readnone %run_options, i8** noalias nocapture readonly %params, i8** noalias nocapture readonly %temps, i64* noalias nocapture readnone %prof_counters) local_unnamed_addr #0 {
 entry:
   %0 = bitcast i8** %params to [100 x [1024 x float]]**
   %arg0.untyped2 = load [100 x [1024 x float]]*, [100 x [1024 x float]]** %0, align 8, !invariant.load !0, !dereferenceable !1, !align !2
   %1 = load i8*, i8** %temps, align 8, !invariant.load !0, !dereferenceable !1, !align !2
   %fusion = bitcast i8* %1 to [100 x [1024 x float]]*
   br label %vector.ph
 
 vector.ph:                                        ; preds = %fusion.loop_exit.dim.1, %entry
   %fusion.invar_address.dim.0.05 = phi i64 [ 0, %entry ], [ %invar.inc, %fusion.loop_exit.dim.1 ]
   br label %vector.body
 
 vector.body:                                      ; preds = %vector.body, %vector.ph
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   %2 = getelementptr inbounds [100 x [1024 x float]], [100 x [1024 x float]]* %arg0.untyped2, i64 0, i64 %fusion.invar_address.dim.0.05, i64 %index
   %3 = bitcast float* %2 to <8 x float>*
   %wide.load = load <8 x float>, <8 x float>* %3, align 16, !invariant.load !0, !noalias !3
   %4 = getelementptr float, float* %2, i64 8
   %5 = bitcast float* %4 to <8 x float>*
   %wide.load10 = load <8 x float>, <8 x float>* %5, align 16, !invariant.load !0, !noalias !3
   %6 = getelementptr float, float* %2, i64 16
   %7 = bitcast float* %6 to <8 x float>*
   %wide.load11 = load <8 x float>, <8 x float>* %7, align 16, !invariant.load !0, !noalias !3
   %8 = getelementptr float, float* %2, i64 24
   %9 = bitcast float* %8 to <8 x float>*
   %wide.load12 = load <8 x float>, <8 x float>* %9, align 16, !invariant.load !0, !noalias !3
   %10 = fcmp fast ogt <8 x float> %wide.load, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
   %11 = fcmp fast ogt <8 x float> %wide.load10, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
   %12 = fcmp fast ogt <8 x float> %wide.load11, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
   %13 = fcmp fast ogt <8 x float> %wide.load12, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
   %14 = select <8 x i1> %10, <8 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <8 x float> %wide.load
   %15 = select <8 x i1> %11, <8 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <8 x float> %wide.load10
   %16 = select <8 x i1> %12, <8 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <8 x float> %wide.load11
   %17 = select <8 x i1> %13, <8 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <8 x float> %wide.load12
   %18 = fcmp fast olt <8 x float> %14, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %19 = fcmp fast olt <8 x float> %15, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %20 = fcmp fast olt <8 x float> %16, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %21 = fcmp fast olt <8 x float> %17, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %22 = select <8 x i1> %18, <8 x float> <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>, <8 x float> %14
   %23 = select <8 x i1> %19, <8 x float> <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>, <8 x float> %15
   %24 = select <8 x i1> %20, <8 x float> <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>, <8 x float> %16
   %25 = select <8 x i1> %21, <8 x float> <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>, <8 x float> %17
   %26 = getelementptr inbounds [100 x [1024 x float]], [100 x [1024 x float]]* %fusion, i64 0, i64 %fusion.invar_address.dim.0.05, i64 %index
   %27 = bitcast float* %26 to <8 x float>*
   store <8 x float> %22, <8 x float>* %27, align 16, !alias.scope !3, !noalias !6
   %28 = getelementptr float, float* %26, i64 8
   %29 = bitcast float* %28 to <8 x float>*
   store <8 x float> %23, <8 x float>* %29, align 16, !alias.scope !3, !noalias !6
   %30 = getelementptr float, float* %26, i64 16
   %31 = bitcast float* %30 to <8 x float>*
   store <8 x float> %24, <8 x float>* %31, align 16, !alias.scope !3, !noalias !6
   %32 = getelementptr float, float* %26, i64 24
   %33 = bitcast float* %32 to <8 x float>*
   store <8 x float> %25, <8 x float>* %33, align 16, !alias.scope !3, !noalias !6
   %index.next = add i64 %index, 32
   %34 = icmp eq i64 %index.next, 1024
   br i1 %34, label %fusion.loop_exit.dim.1, label %vector.body, !llvm.loop !8