public class StripEntities
extends java.lang.Object
#InsertEntities
,
#InsertFileEntities
,
StripEntities()
,
#StripFileEntities
Modifier and Type | Field and Description |
---|---|
private static boolean |
DEBUGGING
true to enable the testing code.
|
private static java.util.HashMap<java.lang.String,java.lang.Character> |
entityToChar
allows lookup by entity name, to get the corresponding char.
|
static int |
LONGEST_ENTITY
Longest an entity can be 10, at least in our tables, including the lead & and trail ;.
|
static int |
SHORTEST_ENTITY
The shortest an entity can be 4, at least in our tables, including the lead & and
trailing ;.
|
private static java.lang.String[] |
spacingTags
tags, that when removed should leave a space behind.
|
static char |
UNICODE_NBSP_160_0x0a
unicode nbsp control char, 160, 0x0a.
|
Constructor and Description |
---|
StripEntities() |
Modifier and Type | Method and Description | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
static char |
bareHTMLEntityToChar(java.lang.String bareEntity,
char howToTranslateNbsp)
convert an entity to a single char.
|
||||||||||||||||||
static char |
entityToChar(java.lang.String entity)
Deprecated.
replaced with bareHTMLEntityToChar(String,char)
|
||||||||||||||||||
static java.lang.String |
flattenHTML(java.lang.String text,
char translateNbspTo)
strips tags and entities from HTML.
|
||||||||||||||||||
static java.lang.String |
flattenXML(java.lang.String text)
strips tags and entities from XML..
|
||||||||||||||||||
static void |
main(java.lang.String[] args)
Test harness.
|
||||||||||||||||||
protected static char |
possBareHTMLEntityWithSemicolonToChar(java.lang.String possBareEntityWithSemicolon,
char translateNbspTo)
Checks a number of gauntlet conditions to ensure this is a valid entity.
|
||||||||||||||||||
static char |
possEntityToChar(java.lang.String possBareEntityWithSemicolon)
Checks a number of gauntlet conditions to ensure this is a valid entity.
|
||||||||||||||||||
private static java.lang.String |
preStripIndividualTags(java.lang.String html)
Prepares tags for removal, to ensure they are replaced by a space
--> _ static java.lang.String stripEntities(java.lang.String text)
Deprecated.
use stripHTMLEntities or stripXML entities..
static java.lang.String stripHTMLEntities(java.lang.String text,
char translateNbspTo)
Converts HTML to text converting entities such as " back to " and < back to < Ordinary text passes
unchanged.
private static java.lang.String stripHTMLTagPairs(java.lang.String s)
remove all text between <applet..
static java.lang.String stripHTMLTags(java.lang.String html)
Removes tags from HTML leaving just the raw text.
private static java.lang.String stripIndividualTags(java.lang.String html)
Removes tags from HTML leaving just the raw text.
static java.lang.String stripNbsp(java.lang.String text)
Deprecated.
stripNbspShould no longer be necessary. stripEntities(String,char) now lets you specify directly the
translation of nbsp you want.
static java.lang.String stripTags(java.lang.String html)
Deprecated.
use stripHTMLTags or stripXMLTags instead.
static java.lang.String stripXMLEntities(java.lang.String text)
Converts XML to text converting entities such as " back to " and < back to < Ordinary text passes
unchanged.
static java.lang.String stripXMLTags(java.lang.String xml)
Removes tags from XML leaving just the raw text.
|
private static final boolean DEBUGGING
public static final char UNICODE_NBSP_160_0x0a
public static final int LONGEST_ENTITY
public static final int SHORTEST_ENTITY
private static final java.util.HashMap<java.lang.String,java.lang.Character> entityToChar
private static java.lang.String[] spacingTags
public static char bareHTMLEntityToChar(java.lang.String bareEntity, char howToTranslateNbsp)
bareEntity
- String entity to convert convert. must have lead & and trail ; stripped; may have form: #x12ff or #123
or lt or nbsp style entity. Works faster if entity in lower case.howToTranslateNbsp
- char you would like   translated to, usually ' ' or (char) 160@Deprecated public static char entityToChar(java.lang.String entity)
entity
- String entity to convert convert. must have lead & and trail ; stripped; may be a #x12ff or #123 style
entity. Works faster if entity in lower case.bareHTMLEntityToChar(String,char)
public static java.lang.String flattenHTML(java.lang.String text, char translateNbspTo)
text
- to flattentranslateNbspTo
- char you would like translated to, usually ' ' or (char) 160 .public static java.lang.String flattenXML(java.lang.String text)
text
- to flattenpublic static char possEntityToChar(java.lang.String possBareEntityWithSemicolon)
possBareEntityWithSemicolon
- string that may hold an entity. Lead & must be stripped, but may optionally contain text past the ;@Deprecated public static java.lang.String stripEntities(java.lang.String text)
text
- raw text to be processed. Must not be null.stripHTMLEntities(String,char)
public static java.lang.String stripHTMLEntities(java.lang.String text, char translateNbspTo)
text
- raw text to be processed. Must not be null.translateNbspTo
- char you would like translated to, usually ' ' or (char) 160 .public static java.lang.String stripHTMLTags(java.lang.String html)
html
- input HTML@Deprecated public static java.lang.String stripNbsp(java.lang.String text)
text
- Text to convertstripHTMLEntities(String,char)
@Deprecated public static java.lang.String stripTags(java.lang.String html)
html
- input HTMLpublic static java.lang.String stripXMLEntities(java.lang.String text)
text
- raw XML text to be processed. Must not be null.public static java.lang.String stripXMLTags(java.lang.String xml)
xml
- input XMLprotected static char possBareHTMLEntityWithSemicolonToChar(java.lang.String possBareEntityWithSemicolon, char translateNbspTo)
possBareEntityWithSemicolon
- string that may hold an entity. Lead & must be stripped, but may optionally contain text past the ;translateNbspTo
- char you would like nbsp translated to, usually ' ' or (char) 160 .private static java.lang.String preStripIndividualTags(java.lang.String html)
html
- input HTML or XMLprivate static java.lang.String stripHTMLTagPairs(java.lang.String s)
s
- HTML string to strip tag pairs out of.private static java.lang.String stripIndividualTags(java.lang.String html)
html
- input HTML or XMLpublic static void main(java.lang.String[] args)
args
- not used.