提交 ddc7fcb3 编写于 作者: 如梦技术's avatar 如梦技术 🐛

mica-http 添加 html、xml 转Bean.

上级 23f96540
......@@ -37,8 +37,8 @@ HttpRequest.get("https://www.baidu.com")
.execute() // 发起请求
.asJsonNode(); // 结果集转换,注:如果网络异常等会直接抛出异常。
// 同类的方法有 asString、asBytes、asStream
// json 类响应:asJsonNode、asObject、asList、asMap,采用 jackson 处理
// xml、html响应:asDocument,采用的 jsoup 处理
// json 类响应:asJsonNode、asValue、asList、asMap,采用 jackson 处理
// xml、html响应:asDocument,asDomValue、asDomList采用的 jsoup 处理
// file 文件:toFile
// 同步
......
......@@ -2,5 +2,6 @@ dependencies {
api project(":mica-core")
api "com.squareup.okhttp3:okhttp:${okhttpVersion}"
api "com.squareup.okhttp3:logging-interceptor:${okhttpVersion}"
implementation "org.springframework.retry:spring-retry"
implementation "org.jsoup:jsoup:${jsoupVersion}"
}
/*
* Copyright (c) 2019-2029, Dreamlu (596392912@qq.com & www.dreamlu.net).
* <p>
* Licensed under the GNU LESSER GENERAL PUBLIC LICENSE 3.0;
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.gnu.org/licenses/lgpl.html
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.dreamlu.mica.http;
import java.lang.annotation.*;
/**
* xml CssQuery
*
* @author L.cm
*/
@Target({ElementType.FIELD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Documented
@Inherited
public @interface CssQuery {
/**
* CssQuery
*
* @return CssQuery
*/
String value();
/**
* 读取的 dom attr
*
* <p>
* attr:元素对于的 attr 的值
* html:整个元素的html
* text:元素内文本
* allText:多个元素的文本值
* </p>
*
* @return attr
*/
String attr() default "";
/**
* 嵌套的内部模型:默认 false
*
* @return 是否为内部模型
*/
boolean inner() default false;
}
/*
* Copyright (c) 2019-2029, Dreamlu (596392912@qq.com & www.dreamlu.net).
* <p>
* Licensed under the GNU LESSER GENERAL PUBLIC LICENSE 3.0;
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.gnu.org/licenses/lgpl.html
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.dreamlu.mica.http;
import lombok.RequiredArgsConstructor;
import net.dreamlu.mica.core.utils.StringUtil;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.jsoup.select.Selector;
import org.springframework.cglib.proxy.InvocationHandler;
import org.springframework.core.ResolvableType;
import org.springframework.util.ReflectionUtils;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;
/**
* 代理模型
*
* @author L.cm
*/
@RequiredArgsConstructor
public class CssQueryMethodInterceptor implements InvocationHandler {
private final Class<?> clazz;
private final Element element;
@Override
public Object invoke(Object object, Method method, Object[] args) throws Throwable {
// 如果是 toString eq 等方法都不准确,故直接返回死值
if (ReflectionUtils.isToStringMethod(method)) {
return clazz.toString();
} else if (ReflectionUtils.isEqualsMethod(method)) {
return false;
} else if (ReflectionUtils.isHashCodeMethod(method)) {
return 1;
}
// 只处理 get 方法 is
String name = method.getName();
if (!name.startsWith("get")) {
return method.invoke(object, args);
}
Field field = clazz.getDeclaredField(StringUtil.firstCharToLower(name.substring(3)));
CssQuery annotation = field.getAnnotation(CssQuery.class);
// 没有注解,不代理
if (annotation == null) {
return method.invoke(object, args);
}
Class<?> returnType = method.getReturnType();
boolean isColl = Collection.class.isAssignableFrom(returnType);
String cssQueryValue = annotation.value();
// 是否为 bean 中 bean
boolean isInner = annotation.inner();
if (isInner) {
return proxyInner(cssQueryValue, method, returnType, isColl);
}
String attrName = annotation.attr();
return proxyValue(cssQueryValue, attrName, returnType, isColl);
}
private Object proxyValue(String cssQueryValue, String attrName, Class<?> returnType, boolean isColl) {
if (isColl) {
Elements elements = Selector.select(cssQueryValue, element);
Collection<Object> valueList = newColl(returnType);
for (Element select : elements) {
valueList.add(getValue(select, attrName));
}
return valueList;
}
Element select = Selector.selectFirst(cssQueryValue, element);
return getValue(select, attrName);
}
private Object proxyInner(String cssQueryValue, Method method, Class<?> returnType, boolean isColl) {
if (isColl) {
Elements elements = Selector.select(cssQueryValue, element);
Collection<Object> valueList = newColl(returnType);
ResolvableType resolvableType = ResolvableType.forMethodReturnType(method);
Class<?> innerType = resolvableType.getGeneric(0).resolve();
for (Element select : elements) {
valueList.add(DomMapper.readValue(select, innerType));
}
return valueList;
}
Element select = Selector.selectFirst(cssQueryValue, element);
return DomMapper.readValue(select, returnType);
}
private String getValue(Element element, String attrName) {
if (StringUtil.isBlank(attrName)) {
return element.outerHtml();
} else if ("html".equalsIgnoreCase(attrName)) {
return element.html();
} else if ("text".equalsIgnoreCase(attrName)) {
return getText(element);
} else if ("allText".equalsIgnoreCase(attrName)) {
return element.text();
} else {
return element.attr(attrName);
}
}
private String getText(Element element) {
return element.childNodes().stream()
.filter(node -> node instanceof TextNode)
.map(node -> (TextNode) node)
.map(TextNode::text)
.collect(Collectors.joining());
}
private Collection<Object> newColl(Class<?> returnType) {
return Set.class.isAssignableFrom(returnType) ? new HashSet<>() : new ArrayList<>();
}
}
/*
* Copyright (c) 2019-2029, Dreamlu (596392912@qq.com & www.dreamlu.net).
* <p>
* Licensed under the GNU LESSER GENERAL PUBLIC LICENSE 3.0;
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.gnu.org/licenses/lgpl.html
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.dreamlu.mica.http;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.cglib.proxy.Enhancer;
import java.util.ArrayList;
import java.util.List;
/**
* 爬虫 xml 转 bean 基于 jsoup
*
* @author L.cm
*/
@SuppressWarnings("unchecked")
public class DomMapper {
/**
* 读取 xml 信息为 java Bean
*
* @param doc xml element
* @param clazz bean Class
* @param <T> 泛型
* @return 对象
*/
public static <T> T readValue(final Element doc, final Class<T> clazz) {
Enhancer enhancer = new Enhancer();
enhancer.setSuperclass(clazz);
enhancer.setUseCache(true);
enhancer.setCallback(new CssQueryMethodInterceptor(clazz, doc));
return (T) enhancer.create();
}
/**
* 读取 xml 信息为 java Bean
*
* @param doc xml element
* @param clazz bean Class
* @param <T> 泛型
* @return 对象列表
*/
public static <T> List<T> readList(Element doc, Class<T> clazz) {
CssQuery annotation = clazz.getAnnotation(CssQuery.class);
if (annotation == null) {
throw new IllegalArgumentException("DomMapper readList " + clazz + " mast has annotation @CssQuery.");
}
String cssQueryValue = annotation.value();
Elements elements = doc.select(cssQueryValue);
List<T> valueList = new ArrayList<>();
for (Element element : elements) {
valueList.add(readValue(element, clazz));
}
return valueList;
}
}
......@@ -46,6 +46,7 @@ public class HttpResponse implements ResponseSpec {
private final Request request;
private final Response response;
private final ResponseBody body;
@Nullable
private IOException exception;
HttpResponse(final Response response) {
......@@ -63,7 +64,7 @@ public class HttpResponse implements ResponseSpec {
private void checkIfException() {
if (exception != null) {
throw new RuntimeException(exception);
throw new MicaHttpException(exception);
}
}
......@@ -210,12 +211,12 @@ public class HttpResponse implements ResponseSpec {
}
@Override
public <T> T asObject(Class<T> valueType) {
public <T> T asValue(Class<T> valueType) {
return JsonUtil.readValue(this.asStream(), valueType);
}
@Override
public <T> T asObject(TypeReference<?> typeReference) {
public <T> T asValue(TypeReference<?> typeReference) {
return JsonUtil.readValue(this.asStream(), typeReference);
}
......@@ -243,6 +244,17 @@ public class HttpResponse implements ResponseSpec {
}
}
@Override
public <T> T asDomValue(Class<T> valueType) {
return DomMapper.readValue(this.asDocument(), valueType);
}
@Override
public <T> List<T> asDomList(Class<T> valueType) {
return DomMapper.readList(this.asDocument(), valueType);
}
@Override
public void toFile(File file) {
toFile(file.toPath());
......
/*
* Copyright (c) 2019-2029, Dreamlu (596392912@qq.com & www.dreamlu.net).
* <p>
* Licensed under the GNU LESSER GENERAL PUBLIC LICENSE 3.0;
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.gnu.org/licenses/lgpl.html
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.dreamlu.mica.http;
/**
* http 请求异常封装
*
* @author L.cm
*/
public class MicaHttpException extends RuntimeException {
public MicaHttpException(String message) {
super(message);
}
public MicaHttpException(Throwable cause) {
super(cause.getMessage(), cause);
}
}
......@@ -110,7 +110,7 @@ public interface ResponseSpec {
* @param valueType value value type
* @return Object
*/
<T> T asObject(Class<T> valueType);
<T> T asValue(Class<T> valueType);
/**
* Returns body to Object.
......@@ -118,7 +118,7 @@ public interface ResponseSpec {
* @param typeReference value Type Reference
* @return Object
*/
<T> T asObject(TypeReference<?> typeReference);
<T> T asValue(TypeReference<?> typeReference);
/**
* Returns body to List.
......@@ -152,6 +152,24 @@ public interface ResponseSpec {
*/
Document asDocument();
/**
* 将 xml、heml 转成对象
*
* @param valueType 对象类
* @param <T> 泛型
* @return 对象
*/
<T> T asDomValue(Class<T> valueType);
/**
* 将 xml、heml 转成对象
*
* @param valueType 对象类
* @param <T> 泛型
* @return 对象集合
*/
<T> List<T> asDomList(Class<T> valueType);
/**
* toFile.
*
......
......@@ -16,13 +16,14 @@
package net.dreamlu.mica.http;
import net.dreamlu.mica.core.utils.ThreadUtil;
import okhttp3.Interceptor;
import okhttp3.Request;
import okhttp3.Response;
import org.springframework.retry.backoff.FixedBackOffPolicy;
import org.springframework.retry.policy.SimpleRetryPolicy;
import org.springframework.retry.support.RetryTemplate;
import java.io.IOException;
import java.util.concurrent.Callable;
/**
* 重试拦截器,应对代理问题
......@@ -39,51 +40,20 @@ public class RetryInterceptor implements Interceptor {
@Override
public Response intercept(Chain chain) throws IOException {
Request request = chain.request();
return retryOnException(retryPolicy, () -> chain.proceed(request));
RetryTemplate template = createRetryTemplate(retryPolicy);
return template.execute(context -> chain.proceed(request));
}
/**
* 在遇到异常时尝试重试
*
* @param retryPolicy 重试策略
* @param retryCallable 重试回调
* @param <V> 泛型
* @return V 结果
*/
private static <V> V retryOnException(RetryPolicy retryPolicy,
Callable<V> retryCallable) {
final int maxAttempts = retryPolicy.getMaxAttempts();
final long sleepMillis = retryPolicy.getSleepMillis();
Throwable lastException = null;
try {
for (int i = 0; i < maxAttempts; i++) {
try {
lastException = null;
return retryCallable.call();
} catch (Throwable e) {
lastException = e;
}
if (sleepMillis > 0) {
ThreadUtil.sleep(sleepMillis);
}
}
} catch (Throwable e) {
lastException = e;
}
if (lastException == null) {
throw new RetryException("Exception in retry");
}
throw new RetryException("Exception in retry", lastException);
}
public static class RetryException extends RuntimeException {
RetryException(String msg, Throwable cause) {
super(msg, cause);
}
RetryException(String message) {
super(message);
}
private static RetryTemplate createRetryTemplate(RetryPolicy policy) {
RetryTemplate template = new RetryTemplate();
// 重试策略
SimpleRetryPolicy retryPolicy = new SimpleRetryPolicy();
retryPolicy.setMaxAttempts(policy.getMaxAttempts());
// 设置间隔策略
FixedBackOffPolicy backOffPolicy = new FixedBackOffPolicy();
backOffPolicy.setBackOffPeriod(policy.getSleepMillis());
template.setRetryPolicy(retryPolicy);
template.setBackOffPolicy(backOffPolicy);
return template;
}
}
......@@ -18,6 +18,7 @@ package net.dreamlu.mica.http;
import lombok.Getter;
import lombok.ToString;
import org.springframework.retry.policy.SimpleRetryPolicy;
/**
* 重试策略
......@@ -27,17 +28,13 @@ import lombok.ToString;
@Getter
@ToString
public class RetryPolicy {
/**
* The default limit to the number of attempts for a new policy.
*/
public static final int DEFAULT_MAX_ATTEMPTS = 3;
public static final RetryPolicy INSTANCE = new RetryPolicy();
private final int maxAttempts;
private final long sleepMillis;
public RetryPolicy() {
this(RetryPolicy.DEFAULT_MAX_ATTEMPTS, 0L);
this(SimpleRetryPolicy.DEFAULT_MAX_ATTEMPTS, 0L);
}
public RetryPolicy(int maxAttempts, long sleepMillis) {
......
......@@ -16,13 +16,13 @@
package net.dreamlu;
import net.dreamlu.mica.http.MicaHttpException;
import net.dreamlu.mica.http.HttpRequest;
import net.dreamlu.mica.http.RetryInterceptor;
import org.junit.Test;
public class HttpRequestProxyTest {
@Test(expected = RetryInterceptor.RetryException.class)
@Test(expected = MicaHttpException.class)
public void test1() {
// 代理都不可用
HttpRequest.get("https://www.baidu.com")
......
package net.dreamlu;
import lombok.Getter;
import lombok.Setter;
import net.dreamlu.mica.http.CssQuery;
import java.util.List;
@Getter
@Setter
public class Oschina {
@CssQuery(value = "head > title", attr = "text")
private String title;
@CssQuery(value = "#v_news .page a", inner = true)
private List<VNews> vNews;
}
package net.dreamlu;
import net.dreamlu.mica.http.HttpRequest;
import java.util.List;
public class OschinaTest {
public static void main(String[] args) {
// 同步,异常返回 null
Oschina oschina = HttpRequest.get("https://www.oschina.net")
.execute()
.onSuccess(responseSpec -> responseSpec.asDomValue(Oschina.class));
if (oschina == null) {
return;
}
System.out.println(oschina.getTitle());
List<VNews> vNews = oschina.getVNews();
for (VNews vNew : vNews) {
System.out.println(vNew.getTitle() + "\t" + vNew.getHref());
}
}
}
package net.dreamlu;
import lombok.Getter;
import lombok.Setter;
import net.dreamlu.mica.http.CssQuery;
@Setter
@Getter
public class VNews {
@CssQuery(value = "a", attr = "title")
private String title;
@CssQuery(value = "a", attr = "href")
private String href;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册