Skip to content

Commit

Permalink
Add support for all ChatGPT crawlers
Browse files Browse the repository at this point in the history
  • Loading branch information
donatj committed Aug 28, 2024
1 parent b0ffbe4 commit 8781e03
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 6 deletions.
2 changes: 2 additions & 0 deletions src/UserAgent/Browsers.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ interface Browsers {
const GOOGLEBOT = 'Googlebot';
const GOOGLEBOT_IMAGE = 'Googlebot-Image';
const GOOGLEBOT_VIDEO = 'Googlebot-Video';
const GPTBOT = 'GPTBot';
const HEADLESSCHROME = 'HeadlessChrome';
const IEMOBILE = 'IEMobile';
const IMESSAGEBOT = 'iMessageBot';
Expand All @@ -36,6 +37,7 @@ interface Browsers {
const MSNBOT_MEDIA = 'msnbot-media';
const NETFRONT = 'NetFront';
const NINTENDOBROWSER = 'NintendoBrowser';
const OAI_SEARCHBOT = 'OAI-SearchBot';
const OCULUSBROWSER = 'OculusBrowser';
const OPERA = 'Opera';
const PUFFIN = 'Puffin';
Expand Down
16 changes: 10 additions & 6 deletions src/UserAgentParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ function parse_user_agent( $u_agent = null ) {
} elseif( $platform == 'Adr' ) {
$platform = 'Android';
} elseif( $platform === null ) {
if(preg_match_all('%(?P<platform>Android)[:/ ]%ix', $u_agent, $result)) {
if( preg_match_all('%(?P<platform>Android)[:/ ]%ix', $u_agent, $result) ) {
$platform = $result[PLATFORM][0];
}
}
Expand All @@ -99,7 +99,7 @@ function parse_user_agent( $u_agent = null ) {
%(?P<browser>Camino|Kindle(\ Fire)?|Firefox|Iceweasel|IceCat|Safari|MSIE|Trident|AppleWebKit|
TizenBrowser|(?:Headless)?Chrome|YaBrowser|Vivaldi|IEMobile|Opera|OPR|Silk|Midori|(?-i:Edge)|EdgA?|CriOS|UCBrowser|Puffin|
OculusBrowser|SamsungBrowser|SailfishBrowser|XiaoMi/MiuiBrowser|YaApp_Android|
Baiduspider|Applebot|Facebot|Googlebot|YandexBot|bingbot|Lynx|Version|Wget|curl|ChatGPT-User|
Baiduspider|Applebot|Facebot|Googlebot|YandexBot|bingbot|Lynx|Version|Wget|curl|ChatGPT-User|GPTBot|OAI-SearchBot|
Valve\ Steam\ Tenfoot|
NintendoBrowser|PLAYSTATION\ (?:\d|Vita)+)
\)?;?
Expand All @@ -126,7 +126,7 @@ function parse_user_agent( $u_agent = null ) {

$lowerBrowser = array_map('strtolower', $result[BROWSER]);

$find = function ( $search, &$key = null, &$value = null ) use ( $lowerBrowser ) {
$find = function( $search, &$key = null, &$value = null ) use ( $lowerBrowser ) {
$search = (array)$search;

foreach( $search as $val ) {
Expand All @@ -142,7 +142,7 @@ function parse_user_agent( $u_agent = null ) {
return false;
};

$findT = function ( array $search, &$key = null, &$value = null ) use ( $find ) {
$findT = function( array $search, &$key = null, &$value = null ) use ( $find ) {
$value2 = null;
if( $find(array_keys($search), $key, $value2) ) {
$value = $search[$value2];
Expand All @@ -169,7 +169,7 @@ function parse_user_agent( $u_agent = null ) {
} elseif( $find('NintendoBrowser', $key) || $platform == 'Nintendo 3DS' ) {
$browser = 'NintendoBrowser';
$version = $result[BROWSER_VERSION][$key];
} elseif( $find(['Kindle'], $key, $platform) ) {
} elseif( $find([ 'Kindle' ], $key, $platform) ) {
$browser = $result[BROWSER][$key];
$version = $result[BROWSER_VERSION][$key];
} elseif( $find('Opera', $key, $browser) ) {
Expand All @@ -188,7 +188,7 @@ function parse_user_agent( $u_agent = null ) {
}
}
}
} elseif( $find([ 'Applebot', 'IEMobile', 'Edge', 'Midori', 'Vivaldi', 'OculusBrowser', 'SamsungBrowser', 'Valve Steam Tenfoot', 'Chrome', 'HeadlessChrome', 'SailfishBrowser', 'ChatGPT-User' ], $key, $browser) ) {
} elseif( $find([ 'Applebot', 'IEMobile', 'Edge', 'Midori', 'Vivaldi', 'OculusBrowser', 'SamsungBrowser', 'Valve Steam Tenfoot', 'Chrome', 'HeadlessChrome', 'SailfishBrowser' ], $key, $browser) ) {
$version = $result[BROWSER_VERSION][$key];
} elseif( $rv_result && $find('Trident') ) {
$browser = 'MSIE';
Expand All @@ -203,6 +203,10 @@ function parse_user_agent( $u_agent = null ) {
$browser = 'BlackBerry Browser';
} elseif( $find('Safari', $key, $browser) || $find('TizenBrowser', $key, $browser) ) {
$version = $result[BROWSER_VERSION][$key];
} elseif( count($result[BROWSER]) ) {
$key = count($result[BROWSER]) - 1;
$browser = $result[BROWSER][$key];
$version = $result[BROWSER_VERSION][$key];
}

if( $find('Version', $key) ) {
Expand Down
15 changes: 15 additions & 0 deletions tests/user_agents.dist.json
Original file line number Diff line number Diff line change
Expand Up @@ -1399,6 +1399,11 @@
"browser": "bingbot",
"version": "2.0"
},
"Mozilla\/5.0 AppleWebKit\/537.36 (KHTML, like Gecko); compatible; ChatGPT-User\/1.0; +https:\/\/openai.com\/bot": {
"platform": null,
"browser": "ChatGPT-User",
"version": "1.0"
},
"curl\/7.19.7 (universal-apple-darwin10.0) libcurl\/7.19.7 OpenSSL\/0.9.8r zlib\/1.2.3": {
"platform": null,
"browser": "curl",
Expand Down Expand Up @@ -1439,6 +1444,11 @@
"browser": "Googlebot-Video",
"version": "1.0"
},
"Mozilla\/5.0 AppleWebKit\/537.36 (KHTML, like Gecko); compatible; GPTBot\/1.1; +https:\/\/openai.com\/gptbot": {
"platform": null,
"browser": "GPTBot",
"version": "1.1"
},
"Lynx\/2.8.6rel.4 libwww-FM\/2.14 SSL-MM\/1.4.1 OpenSSL\/0.9.7l Lynxlet\/0.7.0": {
"platform": null,
"browser": "Lynx",
Expand All @@ -1449,6 +1459,11 @@
"browser": "msnbot-media",
"version": "1.1"
},
"Mozilla\/5.0 AppleWebKit\/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot\/1.0; +https:\/\/openai.com\/searchbot": {
"platform": null,
"browser": "OAI-SearchBot",
"version": "1.0"
},
"Slackbot 1.0 (+https:\/\/api.slack.com\/robots)": {
"platform": null,
"browser": "Slackbot",
Expand Down

0 comments on commit 8781e03

Please sign in to comment.