经典算法：拉默-道格拉斯-普克算法

引言：本文主要介绍RDP算法。道格拉斯-普克算法 (Douglas–Peucker algorithm，亦称为拉默-道格拉斯-普克算法、迭代适应点算法、分裂与合并算法)是将曲线近似表示为一系列点，并减少点的数量的一种算法。它的优点是具有平移和旋转不变性，给定曲线与阈值后，抽样结果一定。

算法步骤

连接曲线首尾两点A、B形成一条直线AB；
2. 计算曲线上离该直线段距离最大的点C，计算其与AB的距离d；
比较该距离与预先给定的阈值threshold的大小，如果小于threshold，则以该直线作为曲线的近似，该段曲线处理完毕。
如果距离大于阈值，则用点C将曲线分为两段AC和BC，并分别对两段曲线进行步骤[1~3]的处理。
当所有曲线都处理完毕后，依次连接各个分割点形成折线，作为原曲线的近似。

实现

实现一

下面采用 C++实现，是一个DFS深搜的方法。

template<typename T>
void RDP(const std::vector<T>& in_pts, 
         std::vector<T> &out_pts,
         float epsilon = 0.1) {
    int size = in_pts.size();
    std::vector<bool> mask(size, false);
    std::pair<int, int> pts_pair(0, size-1);
    std::stack<std::pair<int, int>> stack;
    stack.push(pts_pair);
    
    while(!stack.empty()) {
        auto cur_pair = stack.top();
        stack.pop();
        
        float dmax = 0.;
        int index = 0;
        for (int i = cur_pair.first + 1; i < cur_pair.second; ++i) {
            float d = getPerpendicularDistance(in_pts[cur_pair.first], in_pts[cur_pair.second], in_pts[i]);
            if (d > dmax) {
                dmax = d;
                index = i;
            }
        }
        if (dmax > epsilon) {
            stack.push(std::pair<int, int>(cur_pair.first, i));
            stack.push(std::pair<int, int>(i, cur_pair.second));
        } else {
            mask[cur_pair.first] = true;
            mask[cur_pair.second] = true;
        }
    }
    
    out_pts.reserve(size);
    out_pts.clear();
    for (size_t i = 0; i < mask.size(); ++i) {
        if (mask[i]) {
            out_pts.emplace_back(in_pts[i]);
        }
    }
    out_pts.resize(out_pts.size());
}

getPerpendicularDistance()函数是求点到线的距离函数，有两种方法，如下

// 方法一：利用点到直线的距离公式
template<typename T>
inline float getPerpendicularDistance(const T &lineStart, const T &lineEnd, const T &pt) {
    double A, B, C, maxDist = 0;
    A = lineEnd.y - lineStart.y;
    B = lineStart.x - lineEnd.x;
    C = lineEnd.x * lineStart.y - lineStart.x * lineEnd.y;
    maxDist = fabs((A * pt.x + B * pt.y + C) / sqrt(A * A + B *B));
    return maxDist;
}

// 方法二：利用向量运算
template<typename T>
inline float getPerpendicularDistance(const T &lineStart, const T &lineEnd, const T &pt) {
    float dx = lineEnd.x - lineStart.x;
    float dy = lineEnd.y - lineStart.y;
    float mag = std::pow(std::pow(dx, 2.0) + std::pow(dy, 2.0), 0.5);
    if (mag > 0.0) {
        dx /= mag;
        dy /= mag;
    }

    float pvx = pt.x - lineStart.x;
    float pvy = pt.y - lineStart.y;
    
    float pvdot = dx * pvx + dy * pvy;

    float dsx = pvdot * dx;
    float dsy = pvdot * dy;

    float ax = pvx - dsx;
    float ay = pvy - dsy;

    return std::pow(std::pow(ax, 2.0) + std::pow(ay, 2.0), 0.5);
}

实现二

有时需要对polygon（闭合）的点数进行限制，抽取出固定数量的点，这种情况下，实现如下：

template<typename T>
inline void modifiedRDP(const std::vector<T> &in_pts,
                        std::vector<T> &out_pts,
                        int n_pts = 64) {
    if (static_cast<int>(in_pts.size()) <= n_pts) {
        out_pts = in_pts;
        return;
    }
    
    struct SimData {
        bool flag = true;
        int l_idx;
        int r_idx;
        float epsilon;
    };
    
    int cc = static_cast<int>(in_pts.size());
    std::vector<SimData> mask(in_pts.size());
    for (size_t i = 0; i < mask.size(); ++i) {
        auto &sim_data = mask[i];
        sim_data.l_idx = i - 1;
        sim_data.r_idx = i + 1;
        if (i == 0) {
            sim_data.l_idx = static_cast<int>(mask.size()) - 1;
        }
        if (static_cast<int>(i) == static_cast<int>(mask.size() - 1)) {
            sim_data.r_idx = 0;
        }
        sim_data.epsilon = getPerpendicularDistance(in_pts[i], in_pts[sim_data.l_idx], in_pts[sim_data.r_idx]);
    }
    
    auto comp = [](const SimData &i, const SimData &j) {
        return i.epsilon < j.epsilon;
    };
    while (cc > n_pts) {
        auto res = std::min_element(mask.begin(), mask.end(), comp);
        int idx = static_cast<int>(std::distance(mask.begin(), res));
        // 找到距离最小的点，标记并把其距离置为无穷大
        maks[idx].flag = false;
        mask[idx].epsilon = std::numeric_limits<float>::max();
        cc--;
        // 更新该点的左右邻居
        auto &l_mask = mask[res->l_idx];
        l_mask.r_idx = res->r_idx;
        l_mask.epsilon = getPerpendicularDistance(in_pts[res->l_idx], in_pts[l_mask.l_idx], in_pts[l_mask.r_idx]);
        auto &r_mask = mask[res->r_idx];
        r_mask.l_idx = res->l_idx;
        r_mask.epsilon = getPerpendicularDistance(in_pts[res->r_idx], in_pts[r_mask.l_idx], in_pts[r_mask.l_idx]);
    }
    
    out_pts.reserve(in_pts.size());
    out_pts.clear();
    for (size_t i = 0; i < mask.size(); ++i) {
        if (mask[i].flag) {
            out_pts.emplace_back(in_pts[i]);
        }
    }
    out_pts.resize(out_pts.size());
}

总结

本文主要总结了RDP算法的dfs实现，其原始版本是递归实现的，至于那个版本的实现更快，还需要进一步论述，但是在工程代码中，不推荐使用递归；点到直线的距离计算效率，也需要benchmark进行测试比较。