package com.bcxin.tenant.data.etc.tasks.jobs;
import com.bcxin.event.core.exceptions.BadEventException;
import com.bcxin.flink.streaming.cores.properties.CheckpointConfigProperty;
import com.bcxin.tenant.data.etc.tasks.components.*;
import com.bcxin.tenant.data.etc.tasks.properties.DataEtcConfigProperty;
import org.apache.commons.collections.CollectionUtils;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.connector.jdbc.JdbcExecutionOptions;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema;
import org.apache.flink.core.execution.JobClient;
import org.apache.flink.core.execution.JobListener;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.OffsetResetStrategy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.stream.Collectors;

public class DataEtcJob extends DataJobAbstract {
    private static final Logger logger = LoggerFactory.getLogger(DataEtcJob.class);

    private final Collection<DataEtcConfigProperty> configProperties;
    private final String primaryKeyName = "id";
    private final String env;
    private final String configFile;
    private final boolean isDebug;
    private final CheckpointConfigProperty checkpointConfigProperty;

    public DataEtcJob(CheckpointConfigProperty checkpointConfigProperty,
                      Collection<DataEtcConfigProperty> configProperties,
                      String env, String configFile, boolean isDebug) {
        super(configProperties, configFile, isDebug);
        this.checkpointConfigProperty = checkpointConfigProperty;
        this.configProperties = configProperties;
        this.env = env;
        this.configFile = configFile;
        this.isDebug = isDebug;
    }

    @Override
    protected void coreExecute() throws Exception {
        super.coreExecute();
        StreamExecutionEnvironment env = getStreamExecutionEnvironment(this.checkpointConfigProperty);

        KafkaRecordDeserializationSchema<BinlogRawValue> kafkaRecordDeserializationSchema = getDeserializationSchema();

        JdbcExecutionOptions jdbcExecutionOptions =
                JdbcExecutionOptions.builder()
                        .withBatchSize(500)
                        .withBatchIntervalMs(60_000)
                        .withMaxRetries(3)
                        .build();

        for (DataEtcConfigProperty config : configProperties) {
            buildConfigSourceAndSink(env, kafkaRecordDeserializationSchema, jdbcExecutionOptions, config);
        }

        String title = configProperties.stream()
                .map(ix -> ix.getTitle()).distinct()
                .collect(Collectors.joining(";"));
        env.registerJobListener(new JobListener() {
            @Override
            public void onJobSubmitted(@Nullable JobClient jobClient, @Nullable Throwable throwable) {
                if (jobClient != null) {
                    logger.error("Job以及提交到服务端: jobId={};status={};", jobClient.getJobID(), jobClient.getJobStatus());
                }
            }

            @Override
            public void onJobExecuted(@Nullable JobExecutionResult jobExecutionResult, @Nullable Throwable throwable) {
                if (jobExecutionResult != null) {
                    logger.error("Job以及提交到服务端: jobId={};JobExecutionResult={};",
                            jobExecutionResult.getJobID(),
                            jobExecutionResult.isJobExecutionResult()
                    );
                }
            }
        });

        env.execute(String.format("job-v18.3:%s-环境-%s-%s", this.env, title, configFile));
    }

    private void buildConfigSourceAndSink(StreamExecutionEnvironment env,
                                          KafkaRecordDeserializationSchema<BinlogRawValue> kafkaRecordDeserializationSchema,
                                          JdbcExecutionOptions jdbcExecutionOptions,
                                          DataEtcConfigProperty config) {
        if (config == null || CollectionUtils.isEmpty(config.getTopicSubscribers())) {
            throw new BadEventException("无效KafkaTopic配置数据信息");
        }

        Collection<DataEtcConfigProperty.TmpMergedKafkaConnectionTopicInfo>
                kafkaConnectionTopicInfos = getMergedKafkaConnections(config);

        kafkaConnectionTopicInfos.forEach(kti -> {
            Collection<DataEtcConfigProperty.TopicSubscriberConfigProperty> selectedTopicSubscriberConfigs =
                    config.getTopicSubscribers().stream()
                            .filter(ix -> ix.getRefKafkaName().equalsIgnoreCase(kti.getKafkaConnection().getName()) &&
                                    kti.getTopics().stream().anyMatch(ii -> ii.equalsIgnoreCase(ix.getTopic()))
                            ).collect(Collectors.toList());

            if (CollectionUtils.isEmpty(selectedTopicSubscriberConfigs)) {
                throw new BadEventException("无效主题配置");
            }

            DataEtcConfigProperty.TopicSubscriberConfigProperty selectedSubscriberConfig
                    = selectedTopicSubscriberConfigs.stream().findFirst().get();
            if (selectedTopicSubscriberConfigs.size() > 1) {
                throw new BadEventException(String.format("同一个主题(%s)在一个文件中必须只能只有一个主题订阅",
                        selectedSubscriberConfig.getTopic()));
            }

            String[] selectedTopics = kti.getTopics().toArray(new String[kti.getTopics().size()]);
            String groupId = String.format("g_%s_%s", config.getGroupId(), selectedSubscriberConfig.getUid());
            String clientId = String.format("cli_%s_%s", config.getGroupId(), selectedSubscriberConfig.getUid());
            String sourceUid = String.format("uid_%s_%s", this.configFile, groupId);

            int receiveBufferConfig = 5 * 1024;
            if ("others".equalsIgnoreCase(this.configFile)) {
                receiveBufferConfig = 512;
            }

            logger.error("init with configFile={} receiveBufferConfig={};", this.configFile, receiveBufferConfig);
            KafkaSource<BinlogRawValue> binLogKafkaSource =
                    KafkaSource.<BinlogRawValue>builder()
                            .setBootstrapServers(kti.getKafkaConnection().getBootstrapServer())
                            //调整策略之后，有checkpoint的情况，使用checkpoint, 没有的化, 先按照最新的来消费，丢失的情况, 由人工执行
                            .setStartingOffsets(OffsetsInitializer.committedOffsets(OffsetResetStrategy.LATEST))
                            .setGroupId(groupId)
                            .setClientIdPrefix(clientId)
                            .setTopics(selectedTopics)
                            .setDeserializer(kafkaRecordDeserializationSchema)
                            .setProperty(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "120000")
                            .setProperty(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, "300000")
                            /**
                             * 这个一定要关闭
                             */
                            .setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
                            .setProperty(ConsumerConfig.FETCH_MAX_BYTES_CONFIG, String.valueOf(20 * 1024 * 1024))
                            .setProperty(ConsumerConfig.RECEIVE_BUFFER_CONFIG, String.valueOf(receiveBufferConfig))
                            //如果用户需要精细监控消费延迟，应该开启；如果是短周期批处理作业（如用Flink批模式跑小时任务），开启反而增加Kafka压力，可以关闭。
                            .setProperty("commit.offsets.on.checkpoint", "false")
                            .build();

            WatermarkStrategy watermarkStrategy =
                    WatermarkStrategy.forBoundedOutOfOrderness(Duration.of(5, ChronoUnit.MINUTES))
                            .withTimestampAssigner((event, timestamp) -> {
                                BinlogRawValue rawValue = (BinlogRawValue) event;

                                Date lastSyncVersion = rawValue.getLastSyncVersion();
                                return lastSyncVersion.getTime();
                            })
                            /**
                             * 在多并行度任务中，Flink 的全局 Watermark
                             * 由所有子任务 Watermark 的最小值决定。若某一分区长时间无数据（如 Kafka 某分区无消息），
                             * 其 Watermark 会停滞，导致全局 Watermark 无法推进，进而使依赖 Watermark 的窗口计算、双流 JOIN 等操作被阻塞
                             */
                            .withIdleness(Duration.ofSeconds(10));

            SingleOutputStreamOperator<BinlogRawValue> debeziumJsonNodeDtoKeyedStream
                    = env
                    .fromSource(binLogKafkaSource, watermarkStrategy, String.format("订阅v2-%s", selectedSubscriberConfig.getTopic()))
                    .setParallelism(5) // Source 并行度并非必须大于 Kafka 分区数，反而应遵循等于或匹配分区数的原则
                    .uid(sourceUid);

            buildJdbcSubscriber(
                    kti.getKafkaConnection().getBootstrapServer(),
                    debeziumJsonNodeDtoKeyedStream, jdbcExecutionOptions, config,
                    selectedSubscriberConfig.getSubscriberContents(),
                    sourceUid
            );
        });
    }
}
