更新时间:2024-09-03 17:00:40
本示例展示如何使用Edge Cloud Apps 函数为整个网站构建反向代理,并通过请求 URL、请求头、响应头和响应体的改写来实现无缝访问,结合流式改写和非流式改写两种方式,适用于将网站内容代理到另一个域名下并灵活修改,例如迁移旧网站内容到新域名或整合第三方网站内容。
示例中将www.gutenberg.org
网站的内容代理到 your-domain.com/gutenberg-proxy
路径下,并对请求和响应进行了改写,以确保代理网站的正常访问。
Host
、Referer
和 Origin
字段,使其指向源站。Set-Cookie
和 Location
字段,使其适配代理域名。text/html
: 使用 HTMLRewriter
对 HTML 标签中的 URL 进行改写。text/css
: 使用正则表达式对 CSS 文件中的 url()
函数进行改写。application/x-javascript
和 application/json
: 使用正则表达式对 JavaScript 和 JSON 文件中的 URL 进行改写。const UPSTREAM = "www.gutenberg.org"; // 源站域名
const PATH_PREFIX = "/gutenberg-proxy"; // 代理路径前缀
const RW_START = "/--rw--"; // 用于标记代理域名的起始标识符
const RW_STOP = "--wr--"; // 用于标记代理域名的结束标识符
let g_origin_url; // 存储原始请求 URL
let g_path_pfrefix; // 存储代理路径前缀
async function handleRequest(request) {
g_origin_url = new URL(request.url);
// 处理 POST 请求
if (request.method === "POST") {
let body = await request.text();
}
// 还原 URL,获取源站 URL 和代理路径前缀
const {urlString, pathPrefix} = restoreURL(request.url);
g_path_pfrefix = pathPrefix;
const url = new URL(urlString);
console.log("request.url", request.url);
console.log("restore url", urlString);
// 特殊处理:删除 Accept-Encoding 头部,避免解压缩失败
if (url.pathname === "/browse/scores/top") {
request.headers.delete("Accept-Encoding");
}
// 创建新的请求,并修改请求头
const newRequest = new Request(urlString, request);
const headers = newRequest.headers;
headers.set("Host", url.host); // 设置 Host 头部为源站域名
// 改写 Referer 头部
const referer = headers.get("Referer");
if (referer) {
const {urlString, pathPrefix} = restoreURL(referer);
headers.set("Referer", urlString);
if (!url.pathname.endsWith(".css")) {
g_path_pfrefix = pathPrefix;
}
}
// 改写 Origin 头部
const origin = headers.get("Origin");
if (origin) {
const {urlString} = restoreURL(origin);
headers.set("Origin", urlString);
}
// 发送请求到源站
const response = await fetch(newRequest, {cdnProxy: false, redirect: "manual"});
const responseHeaders = response.headers;
// 改写 Set-Cookie 头部
let cookie = responseHeaders.get("Set-Cookie");
if (cookie) {
cookie = cookie.replace(/(domain=)([^;]+);/gi, '$1'+g_origin_url.host+';');
responseHeaders.set("Set-Cookie", cookie);
}
// 改写 Location 头部
let location = responseHeaders.get("Location");
if (location) {
location = rewriteURL(location);
responseHeaders.set("Location", location);
}
const contentType = getResponseHeader(response, "Content-Type");
// 根据响应内容类型进行不同的改写操作
if (contentType.includes("text/html")) {
return new HTMLRewriter()
.on("a", new URLHandler(["href", "data-url", "data-verify-url"]))
.on("link", new URLHandler(["href"]))
.on("script", new URLHandler(["src"]))
.on("iframe", new URLHandler(["src"]))
.on("input", new URLHandler(["src"]))
.on("div", new URLHandler(["style", "data-url", "data-status-url"]))
.on("img", new URLHandler(["src", "data-origin"]))
.on("form", new URLHandler(["action"]))
.on("meta", new URLHandler(["content"]))
.on("span", new URLHandler(["data-verify-url"]))
.transform(response);
} else if (contentType.includes("text/css")) {
let text = await response.text();
text = rewriteText(text, /url\((.*?)\)/g);
return new Response(text, response);
} else if (contentType.includes("application/x-javascript")) {
let text = await response.text();
text = text.replace(/https:\\\/\\\//g, "https://");
text = rewriteText(text, /'(\/j\/subject\/)'/g);
text = rewriteText(text, /"https?:(\/\/.*?)"/gi);
text = rewriteText(text, /'https?:(\/\/.*?)'/gi);
text = rewriteText(text, /\.get\("(.*?)\"/g);
return new Response(text, response);
} else if (contentType.includes("application/json")) {
let text = await response.text();
text = rewriteText(text, /"https?:(\/\/.*?)"/gi);
return new Response(text, response);
} else {
return response;
}
}
// 获取响应头
function getResponseHeader(response, headerName) {
const value = response.headers.get(headerName);
return value ? value.toLowerCase() : "";
}
// 改写文本中的 URL
function rewriteText(text, reg) {
let result = text.replace(reg, function(match, str){
let result = match.replace(str, rewriteURL(str));
result = result.replace("https", "http");
return result;
});
return result;
}
// 处理 HTML 元素中的 URL
class URLHandler {
constructor(attrs) {
this.attrs = attrs;
}
text(text) {
let result = rewriteText(text.text, /':?(\/\/.*?)'/g);
result = rewriteText(result, /"https?:(\/\/.*?)"/gi);
result = rewriteText(result, /'https?:(\/\/.*?)'/gi);
if (result != text.text) {
text.replace(result);
}
}
element(element) {
for (let attr of this.attrs) {
const href1 = element.getAttribute(attr);
if (!href1) continue;
let href2;
if (attr === "style") {
href2 = rewriteText(href1, /url\((.*?)\)/g);
} else {
href2 = rewriteURL(href1);
}
if (href1 != href2) {
element.setAttribute(attr, href2);
}
}
}
}
// 改写 URL,将源站 URL 转换为代理 URL
function rewriteURL(originURL) {
if (!originURL.startsWith("/") && !originURL.startsWith("http")) {
return originURL;
}
originURL = originURL.replace(///g, "/").replace(/\\\//g, "/");
if (originURL.startsWith("https://")) {
originURL = originURL.replace("https://", "http://");
}
let fullURL = originURL;
if (originURL.startsWith("//")) {
fullURL = "http:" + originURL;
} else if (originURL.startsWith("/")) {
return g_path_pfrefix + originURL;
}
try {
const url = new URL(fullURL);
let host = '';
if (url.host != UPSTREAM) {
host = `${RW_START}${url.host.replace(/\./g, "---")}${RW_STOP}`;
}
const rw = `${g_origin_url.host}${PATH_PREFIX}${host}`;
return originURL.replace(url.host, rw);
} catch (e) {
console.error("rewriter error", e, originURL);
return originURL;
}
}
// 还原 URL,将代理 URL 转换为源站 URL
function restoreURL(rewritedURL) {
if (rewritedURL.endsWith(PATH_PREFIX) || rewritedURL.endsWith(RW_STOP)) {
rewritedURL += "/";
}
const url = new URL(rewritedURL);
let pathname = url.pathname;
let pathPrefix, host;
if (pathname.startsWith(PATH_PREFIX)) {
pathname = pathname.substring(PATH_PREFIX.length);
}
if (pathname.startsWith(RW_START) && pathname.includes(RW_STOP)) {
const stop = pathname.indexOf(RW_STOP);
pathPrefix = PATH_PREFIX + pathname.substring(0, stop + RW_STOP.length);
host = pathname.substring(RW_START.length, stop).replace(/---/g, ".");
} else {
host = UPSTREAM;
pathPrefix = PATH_PREFIX;
}
return {
urlString: rewritedURL.replace(url.protocol, "https:").replace(url.host, host).replace(pathPrefix, ''),
pathPrefix: pathPrefix
};
}
addEventListener("fetch", event => {
return event.respondWith(handleRequest(event.request));
});